예제 #1
0
def run_metrics_query(
    *,
    entity_key: EntityKey,
    select: List[Column],
    where: List[Condition],
    groupby: List[Column],
    projects: Sequence[Project],
    org_id: int,
    referrer: str,
) -> Mapping[str, Any]:
    # Round timestamp to minute to get cache efficiency:
    now = datetime.now().replace(second=0, microsecond=0)

    query = Query(
        dataset=Dataset.Metrics.value,
        match=Entity(entity_key.value),
        select=select,
        groupby=groupby,
        where=[
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, [p.id for p in projects]),
            Condition(Column(TS_COL_QUERY), Op.GTE, now - timedelta(hours=24)),
            Condition(Column(TS_COL_QUERY), Op.LT, now),
        ] + where,
        granularity=Granularity(GRANULARITY),
    )
    result = raw_snql_query(query, referrer, use_cache=True)
    return result["data"]
예제 #2
0
def _get_hash_for_parent_level(group: Group, id: int,
                               levels_overview: LevelsOverview) -> str:
    # If this is violated, there cannot be a 1:1 mapping between level and hash.
    assert 0 <= id < levels_overview.current_level

    # This cache never needs explicit invalidation because during every level
    # change, the group ID changes.
    #
    # No idea if the query is slow, caching just because I can.
    cache_key = f"group-parent-level-hash:{group.id}:{id}"

    return_hash: str = cache.get(cache_key)

    if return_hash is None:
        query = (Query("events", Entity("events")).set_select([
            Function("arrayElement", [Column("hierarchical_hashes"), id + 1],
                     "hash")
        ]).set_where(_get_group_filters(group)).set_limit(1))

        return_hash: str = get_path(snuba.raw_snql_query(query), "data", 0,
                                    "hash")  # type: ignore
        cache.set(cache_key, return_hash)

    assert return_hash
    return return_hash
 def data_fn(offset, limit):
     if use_snql:
         trend_query.offset = Offset(offset)
         trend_query.limit = Limit(limit)
         result = raw_snql_query(
             trend_query.get_snql_query(),
             referrer="api.trends.get-percentage-change.wip-snql",
         )
         result = discover.transform_results(
             result, trend_query.function_alias_map, {}, None
         )
         return result
     else:
         return discover.query(
             selected_columns=selected_columns + trend_columns,
             query=query,
             params=params,
             orderby=orderby,
             offset=offset,
             limit=limit,
             referrer="api.trends.get-percentage-change",
             auto_fields=True,
             auto_aggregations=True,
             use_aggregate_conditions=True,
         )
예제 #4
0
        def get_event_stats(
            query_columns: Sequence[str],
            query: str,
            params: Dict[str, str],
            rollup: int,
            zerofill_results: bool,
            comparison_delta: Optional[datetime] = None,
        ) -> SnubaTSResult:
            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="timeseries.filter_transform"):
                builder = TimeseriesQueryBuilder(
                    Dataset.Discover,
                    params,
                    rollup,
                    query=query,
                    selected_columns=query_columns,
                    functions_acl=[
                        "array_join", "percentileArray", "sumArray"
                    ],
                )

                span_op_column = builder.resolve_function(
                    "array_join(spans_op)")
                span_group_column = builder.resolve_function(
                    "array_join(spans_group)")

                # Adding spans.op and spans.group to the group by because
                # We need them in the query to help the array join optimizer
                # in snuba take effect but the TimeseriesQueryBuilder
                # removes all non aggregates from the select clause.
                builder.groupby.extend([span_op_column, span_group_column])

                builder.add_conditions([
                    Condition(
                        Function("tuple", [span_op_column, span_group_column]),
                        Op.IN,
                        Function("tuple",
                                 [Function("tuple", [span.op, span.group])]),
                    ),
                ])

                snql_query = builder.get_snql_query()
                results = raw_snql_query(
                    snql_query,
                    "api.organization-events-spans-performance-stats")

            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="timeseries.transform_results"):
                result = discover.zerofill(
                    results["data"],
                    params["start"],
                    params["end"],
                    rollup,
                    "time",
                )

            return SnubaTSResult({"data": result}, params["start"],
                                 params["end"], rollup)
def _get_snuba_query_data(
    org_id: int,
    query: QueryDefinition,
    entity_key: EntityKey,
    metric_name: _MetricName,
    metric_id: int,
    columns: Sequence[str],
    extra_conditions: Optional[List[Condition]] = None,
    remove_groupby: Optional[Set[Column]] = None,
) -> Generator[Tuple[_MetricName, _SnubaData], None, None]:
    """Get data from snuba"""
    if extra_conditions is None:
        extra_conditions = []

    if remove_groupby is None:
        remove_groupby = set()

    for query_type in ("series", "totals"):
        snuba_query = _get_snuba_query(
            org_id,
            query,
            entity_key,
            metric_id,
            columns,
            series=query_type == "series",
            extra_conditions=extra_conditions,
            remove_groupby=remove_groupby,
        )
        referrer = REFERRERS[metric_name][query_type]
        query_data = raw_snql_query(snuba_query, referrer=referrer)["data"]

        yield (metric_name, query_data)
예제 #6
0
def _check_releases_have_health_data(
    organization_id: int,
    project_ids: List[int],
    release_versions: List[str],
    start: datetime,
    end: datetime,
) -> Set[str]:
    """
    Returns a set of all release versions that have health data within a given period of time.
    """
    if not release_versions:
        return set()

    query = Query(
        dataset="sessions",
        match=Entity("sessions"),
        select=[Column("release")],
        groupby=[Column("release")],
        where=[
            Condition(Column("started"), Op.GTE, start),
            Condition(Column("started"), Op.LT, end),
            Condition(Column("org_id"), Op.EQ, organization_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("release"), Op.IN, release_versions),
        ],
    )
    data = snuba.raw_snql_query(query, referrer="snuba.sessions.check_releases_have_health_data")[
        "data"
    ]
    return {row["release"] for row in data}
예제 #7
0
def get_levels_overview(group):
    query = (Query("events", Entity("events")).set_select([
        Column("primary_hash"),
        Function("max", [Function("length", [Column("hierarchical_hashes")])],
                 "num_levels"),
        _current_level_expr(group),
    ]).set_where(_get_group_filters(group)).set_groupby(
        [Column("primary_hash")]))

    res = snuba.raw_snql_query(
        query, referrer="api.group_hashes_levels.get_levels_overview")

    if not res["data"]:
        raise NoEvents()

    if len(res["data"]) > 1:
        raise MergedIssues()

    assert len(res["data"]) == 1

    fields = res["data"][0]

    if fields["num_levels"] <= 0:
        raise NotHierarchical()

    # TODO: Cache this if it takes too long. This is called from multiple
    # places, grouping overview and then again in the new-issues endpoint.

    return LevelsOverview(
        current_level=fields["current_level"] - 1,
        only_primary_hash=fields["primary_hash"],
        num_levels=fields["num_levels"],
    )
예제 #8
0
def _get_full_hierarchical_hashes(group: Group, hash: str) -> Optional[Sequence[str]]:
    query = (
        Query("events", Entity("events"))
        .set_select(
            [
                Column("hierarchical_hashes"),
            ]
        )
        .set_where(
            _get_group_filters(group)
            + [
                Condition(
                    Function(
                        "has",
                        [Column("hierarchical_hashes"), hash],
                    ),
                    Op.EQ,
                    1,
                ),
            ]
        )
    )

    data = snuba.raw_snql_query(query, referrer="group_split.get_full_hierarchical_hashes")["data"]
    if not data:
        return None

    return data[0]["hierarchical_hashes"]
예제 #9
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def get_series(self, project: Project, query: QueryDefinition) -> dict:
        """Get time series for the given query"""

        intervals = list(query.get_intervals())

        snuba_queries = SnubaQueryBuilder(project, query).get_snuba_queries()
        results = {
            entity: {
                # TODO: Should we use cache?
                key: raw_snql_query(query,
                                    use_cache=False,
                                    referrer=f"api.metrics.{key}")
                for key, query in queries.items()
            }
            for entity, queries in snuba_queries.items()
        }

        converter = SnubaResultConverter(project.organization_id, query,
                                         intervals, results)

        return {
            "start": query.start,
            "end": query.end,
            "query": query.query,
            "intervals": intervals,
            "groups": converter.translate_results(),
        }
예제 #10
0
파일: metrics.py 프로젝트: KingDEV95/sentry
            def _get_data(entity_key: EntityKey,
                          metric_name: str) -> Tuple[int, int]:
                total = 0
                crashed = 0
                metric_id = try_get_string_index(org_id, metric_name)
                if metric_id is not None:
                    where = conditions + [
                        Condition(Column("metric_id"), Op.EQ, metric_id),
                        Condition(Column("timestamp"), Op.LT, end),
                    ]
                    data = raw_snql_query(
                        Query(
                            dataset=Dataset.Metrics.value,
                            match=Entity(entity_key.value),
                            select=[Column("value")],
                            where=where,
                            groupby=[Column(status_key)],
                        ),
                        referrer=
                        "release_health.metrics.crash-free-breakdown.session",
                    )["data"]
                    for row in data:
                        if row[status_key] == status_init:
                            total = int(row["value"])
                        elif row[status_key] == status_crashed:
                            crashed = int(row["value"])

                return total, crashed
예제 #11
0
def _get_snuba_query_data(
    org_id: int,
    query: QueryDefinition,
    entity_key: EntityKey,
    metric_key: MetricKey,
    metric_id: int,
    columns: List[SelectableExpression],
    limit_state: _LimitState,
    extra_conditions: Optional[List[Condition]] = None,
) -> Generator[Tuple[MetricKey, _SnubaData], None, None]:
    """Get data from snuba"""

    for query_type in ("totals", "series"):
        snuba_query = _get_snuba_query(
            org_id,
            query,
            entity_key,
            metric_id,
            columns,
            series=query_type == "series",
            limit_state=limit_state,
            extra_conditions=extra_conditions or [],
        )
        referrer = REFERRERS[metric_key][query_type]
        if snuba_query is None:
            query_data = []
        else:
            query_data = raw_snql_query(snuba_query, referrer=referrer)["data"]
            limit_state.update(snuba_query.groupby, query_data)

        yield (metric_key, query_data)
예제 #12
0
def wip_snql_query(
    selected_columns,
    query,
    params,
    equations=None,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    """
    Replacement API for query using snql, this function is still a work in
    progress and is not ready for use in production
    """
    builder = QueryBuilder(
        Dataset.Discover,
        params,
        query=query,
        selected_columns=selected_columns,
        orderby=orderby,
        use_aggregate_conditions=use_aggregate_conditions,
        limit=limit,
    )
    snql_query = builder.get_snql_query()

    results = raw_snql_query(snql_query, referrer)
    return results
예제 #13
0
def monitor_release_adoption(**kwargs):
    metrics.incr("sentry.tasks.monitor_release_adoption.start", sample_rate=1.0)
    # 1. Query snuba for all project ids that have sessions.
    with metrics.timer(
        "sentry.tasks.monitor_release_adoption.aggregate_projects.loop", sample_rate=1.0
    ):
        aggregated_projects = defaultdict(list)
        start_time = time.time()
        offset = 0
        while (time.time() - start_time) < MAX_SECONDS:
            query = (
                Query(
                    dataset="sessions",
                    match=Entity("org_sessions"),
                    select=[
                        Column("org_id"),
                        Column("project_id"),
                    ],
                    groupby=[Column("org_id"), Column("project_id")],
                    where=[
                        Condition(
                            Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)
                        ),
                        Condition(Column("started"), Op.LT, datetime.utcnow()),
                    ],
                    granularity=Granularity(3600),
                    orderby=[
                        OrderBy(Column("org_id"), Direction.ASC),
                        OrderBy(Column("project_id"), Direction.ASC),
                    ],
                )
                .set_limit(CHUNK_SIZE + 1)
                .set_offset(offset)
            )
            data = snuba.raw_snql_query(query, referrer="tasks.monitor_release_adoption")["data"]
            count = len(data)
            more_results = count > CHUNK_SIZE
            offset += CHUNK_SIZE

            if more_results:
                data = data[:-1]

            for row in data:
                aggregated_projects[row["org_id"]].append(row["project_id"])

            if not more_results:
                break

        else:
            logger.info(
                "monitor_release_adoption.loop_timeout",
                sample_rate=1.0,
                extra={"offset": offset},
            )
    with metrics.timer(
        "sentry.tasks.monitor_release_adoption.process_projects_with_sessions", sample_rate=1.0
    ):
        for org_id in aggregated_projects:
            process_projects_with_sessions.delay(org_id, aggregated_projects[org_id])
예제 #14
0
def build_project_usage_outcomes(start__stop, project):
    start, stop = start__stop

    # XXX(epurkhiser): Tsdb used to use day buckets, where the end would
    # represent a whole day. Snuba queries more accurately thus we must
    # capture the entire last day
    end = stop + timedelta(days=1)

    query = Query(
        dataset=Dataset.Outcomes.value,
        match=Entity("outcomes"),
        select=[
            Column("outcome"),
            Column("category"),
            Function("sum", [Column("quantity")], "total"),
        ],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, end),
            Condition(Column("project_id"), Op.EQ, project.id),
            Condition(Column("org_id"), Op.EQ, project.organization_id),
            Condition(
                Column("outcome"), Op.IN,
                [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]),
            Condition(
                Column("category"),
                Op.IN,
                [*DataCategory.error_categories(), DataCategory.TRANSACTION],
            ),
        ],
        groupby=[Column("outcome"), Column("category")],
        granularity=Granularity(ONE_DAY),
    )
    data = raw_snql_query(query, referrer="reports.outcomes")["data"]

    return (
        # Accepted errors
        sum(row["total"] for row in data
            if row["category"] in DataCategory.error_categories()
            and row["outcome"] == Outcome.ACCEPTED),
        # Dropped errors
        sum(row["total"] for row in data
            if row["category"] in DataCategory.error_categories()
            and row["outcome"] == Outcome.RATE_LIMITED),
        # accepted transactions
        sum(row["total"] for row in data
            if row["category"] == DataCategory.TRANSACTION
            and row["outcome"] == Outcome.ACCEPTED),
        # Dropped transactions
        sum(row["total"] for row in data
            if row["category"] == DataCategory.TRANSACTION
            and row["outcome"] == Outcome.RATE_LIMITED),
    )
예제 #15
0
def run_outcomes_query_timeseries(query: QueryDefinition) -> ResultSet:
    snql_query = Query(
        dataset=query.dataset.value,
        match=Entity(query.match),
        select=query.select_params,
        groupby=query.group_by + [Column(TS_COL)],
        where=query.conditions,
        limit=Limit(10000),
        offset=Offset(0),
        granularity=Granularity(query.rollup),
    )
    result_timeseries = raw_snql_query(snql_query, referrer="outcomes.timeseries")
    return _format_rows(result_timeseries["data"], query)
예제 #16
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def get_changed_project_release_model_adoptions(
        self,
        project_ids: Sequence[ProjectId],
    ) -> Sequence[ProjectRelease]:

        now = datetime.now(pytz.utc)
        start = now - timedelta(days=3)

        projects_ids = list(project_ids)

        if len(projects_ids) == 0:
            return []

        org_id = self._get_org_id(project_ids)
        release_column_name = tag_key(org_id, "release")

        query_cols = [Column("project_id"), Column(release_column_name)]
        group_by = query_cols

        where_clause = [
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("metric_id"), Op.EQ, metric_id(org_id,
                                                            "session")),
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, now),
        ]

        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity("metrics_counters"),
            select=query_cols,
            where=where_clause,
            groupby=group_by,
        )
        result = raw_snql_query(
            query,
            referrer=
            "release_health.metrics.get_changed_project_release_model_adoptions",
            use_cache=False,
        )

        def extract_row_info(
                row: Mapping[str, Union[OrganizationId,
                                        str]]) -> ProjectRelease:
            return row.get("project_id"), reverse_tag_value(
                org_id, row.get(release_column_name))  # type: ignore

        return [extract_row_info(row) for row in result["data"]]
예제 #17
0
    def test_basic(self) -> None:
        now = datetime.now()
        self._insert_event_for_time(now)

        query = (Query(dataset="events", match=Entity("events")).set_select([
            Function("count", [], "count")
        ]).set_groupby([Column("project_id")]).set_where([
            Condition(Column("project_id"), Op.EQ, self.project.id),
            Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)),
            Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)),
        ]))

        result = snuba.raw_snql_query(query)
        assert len(result["data"]) == 1
        assert result["data"][0] == {"count": 1, "project_id": self.project.id}
예제 #18
0
def _get_project_releases_count(
    organization_id: int,
    project_ids: Sequence[int],
    scope: str,
    stats_period: Optional[str] = None,
    environments: Optional[Sequence[str]] = None,
) -> int:
    """
    Fetches the total count of releases/project combinations
    """
    if stats_period is None:
        stats_period = "24h"

    # Special rule that we support sorting by the last 24h only.
    if scope.endswith("_24h"):
        stats_period = "24h"

    _, stats_start, _ = get_rollup_starts_and_buckets(stats_period)

    where = [
        Condition(Column("started"), Op.GTE, stats_start),
        Condition(Column("started"), Op.LT, datetime.now()),
        Condition(Column("project_id"), Op.IN, project_ids),
        Condition(Column("org_id"), Op.EQ, organization_id),
    ]
    if environments is not None:
        where.append(Condition(Column("environment"), Op.IN, environments))

    having = []
    # Filter out releases with zero users when sorting by either `users` or `crash_free_users`
    if scope in ["users", "crash_free_users"]:
        having.append(Condition(Column("users"), Op.GT, 0))

    query = Query(
        dataset="sessions",
        match=Entity("sessions"),
        select=[
            Function(
                "uniqExact",
                [Column("release"), Column("project_id")],
                alias="count")
        ],
        where=where,
        having=having,
    )
    data = snuba.raw_snql_query(
        query, referrer="snuba.sessions.get_project_releases_count")["data"]
    return data[0]["count"] if data else 0
 def data_fn(offset: int, limit: int) -> Any:
     builder = QueryBuilder(
         dataset=Dataset.Discover,
         params=params,
         selected_columns=["spans_op", "count()"],
         array_join="spans_op",
         query=query,
         limit=limit,
         offset=offset,
         orderby="-count",
     )
     snql_query = builder.get_snql_query()
     results = raw_snql_query(snql_query,
                              "api.organization-events-span-ops")
     return [
         SpanOp(op=row["spans_op"], count=row["count"])
         for row in results["data"]
     ]
예제 #20
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def check_releases_have_health_data(
        self,
        organization_id: OrganizationId,
        project_ids: Sequence[ProjectId],
        release_versions: Sequence[ReleaseName],
        start: datetime,
        end: datetime,
    ) -> Set[ReleaseName]:

        release_column_name = tag_key(organization_id, "release")
        releases_ids = [
            release_id for release_id in [
                try_get_string_index(organization_id, release)
                for release in release_versions
            ] if release_id is not None
        ]
        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity("metrics_counters"),
            select=[Column(release_column_name)],
            where=[
                Condition(Column("org_id"), Op.EQ, organization_id),
                Condition(Column("project_id"), Op.IN, project_ids),
                Condition(Column("metric_id"), Op.EQ,
                          metric_id(organization_id, "session")),
                Condition(Column(release_column_name), Op.IN, releases_ids),
                Condition(Column("timestamp"), Op.GTE, start),
                Condition(Column("timestamp"), Op.LT, end),
            ],
            groupby=[Column(release_column_name)],
        )

        result = raw_snql_query(
            query,
            referrer="release_health.metrics.check_releases_have_health_data",
            use_cache=False,
        )

        def extract_row_info(
                row: Mapping[str, Union[OrganizationId, str]]) -> ReleaseName:
            return reverse_tag_value(
                organization_id, row.get(release_column_name))  # type: ignore

        return {extract_row_info(row) for row in result["data"]}
예제 #21
0
    def test_cache(self):
        """Minimal test to verify if use_cache works"""
        results = snuba.raw_snql_query(
            Query(
                "events",
                Entity("events"),
                select=[Column("event_id")],
                where=[
                    Condition(Column("project_id"), Op.EQ, self.project.id),
                    Condition(Column("timestamp"), Op.GTE,
                              timezone.now() - timedelta(days=1)),
                    Condition(Column("timestamp"), Op.LT, timezone.now()),
                ],
                limit=Limit(1),
            ),
            use_cache=True,
        )

        assert results["data"] == []
예제 #22
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def _get_crash_free_rate_data(
        org_id: int,
        project_ids: Sequence[int],
        start: datetime,
        end: datetime,
        rollup: int,
    ) -> Dict[int, Dict[str, float]]:

        data: Dict[int, Dict[str, float]] = {}

        session_status = tag_key(org_id, "session.status")

        count_query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity(EntityKey.MetricsCounters.value),
            select=[Column("value")],
            where=[
                Condition(Column("org_id"), Op.EQ, org_id),
                Condition(Column("project_id"), Op.IN, project_ids),
                Condition(Column("metric_id"), Op.EQ,
                          metric_id(org_id, "session")),
                Condition(Column("timestamp"), Op.GTE, start),
                Condition(Column("timestamp"), Op.LT, end),
            ],
            groupby=[
                Column("project_id"),
                Column(session_status),
            ],
            granularity=Granularity(rollup),
        )

        count_data = raw_snql_query(
            count_query,
            referrer="release_health.metrics.get_crash_free_data",
            use_cache=False)["data"]

        for row in count_data:
            project_data = data.setdefault(row["project_id"], {})
            tag_value = reverse_tag_value(org_id, row[session_status])
            project_data[tag_value] = row["value"]

        return data
예제 #23
0
def _get_snuba_query_data(
    org_id: int,
    query: QueryDefinition,
    entity_key: EntityKey,
    metric_key: MetricKey,
    metric_id: int,
    columns: List[SelectableExpression],
    limit_state: _LimitState,
    extra_conditions: Optional[List[Condition]] = None,
) -> Generator[Tuple[MetricKey, _SnubaData], None, None]:
    """Get data from snuba"""

    for query_type in ("totals", "series"):
        snuba_query = _get_snuba_query(
            org_id,
            query,
            entity_key,
            metric_id,
            columns,
            series=query_type == "series",
            limit_state=limit_state,
            extra_conditions=extra_conditions or [],
        )
        referrer = REFERRERS[metric_key][query_type]
        if snuba_query is None:
            query_data = []
        else:
            query_data = raw_snql_query(snuba_query, referrer=referrer)["data"]

        if not query_data:
            # If the first totals query returned empty results,
            # 1. there is no need to query time series,
            # 2. we do not update the LimitState. This gives the next query
            #    the chance to populate the groups.
            #    For example: if the first totals query fetches count_uniq(users),
            #    but a project does not track users at all, we should order by
            #    the results of the second totals query instead.
            break

        assert snuba_query is not None
        limit_state.update(snuba_query.groupby, query_data)
        yield (metric_key, query_data)
예제 #24
0
파일: metrics.py 프로젝트: KingDEV95/sentry
        def _count_users(total: bool, referrer: str) -> Dict[Any, int]:
            query = Query(
                dataset=Dataset.Metrics.value,
                match=Entity(EntityKey.MetricsSets.value),
                select=[Column("value")],
                where=_get_common_where(total) + [
                    Condition(Column("metric_id"), Op.EQ,
                              metric_id(org_id, "user")),
                ],
                groupby=_get_common_groupby(total),
            )

            return _convert_results(
                raw_snql_query(
                    query,
                    referrer=referrer,
                    use_cache=False,
                )["data"],
                total=total,
            )
예제 #25
0
 def query_p95(interval):
     start, stop = interval
     query = Query(
         dataset=Dataset.Transactions.value,
         match=Entity("transactions"),
         select=[
             Column("transaction_name"),
             Function("quantile(0.95)", [Column("duration")], "p95"),
         ],
         where=[
             Condition(Column("finish_ts"), Op.GTE, start),
             Condition(Column("finish_ts"), Op.LT,
                       stop + timedelta(days=1)),
             Condition(Column("transaction_name"), Op.IN,
                       transaction_names),
             Condition(Column("project_id"), Op.EQ, project.id),
         ],
         groupby=[Column("transaction_name")],
     )
     return raw_snql_query(query, referrer="reports.key_transactions.p95")
예제 #26
0
def build_key_errors(interval, project):
    start, stop = interval

    # Take the 3 most frequently occuring events
    query = Query(
        dataset=Dataset.Events.value,
        match=Entity("events"),
        select=[Column("group_id"), Function("count", [])],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
        ],
        groupby=[Column("group_id")],
        orderby=[OrderBy(Function("count", []), Direction.DESC)],
        limit=Limit(3),
    )
    query_result = raw_snql_query(query, referrer="reports.key_errors")
    key_errors = query_result["data"]
    return [(e["group_id"], e["count()"]) for e in key_errors]
예제 #27
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def get_oldest_health_data_for_releases(
        self,
        project_releases: Sequence[ProjectRelease],
    ) -> Mapping[ProjectRelease, str]:

        now = datetime.now(pytz.utc)
        start = now - timedelta(days=90)

        project_ids: List[ProjectId] = [x[0] for x in project_releases]
        org_id = self._get_org_id(project_ids)
        release_column_name = tag_key(org_id, "release")
        releases = [x[1] for x in project_releases]
        releases_ids = [
            release_id for release_id in
            [try_get_string_index(org_id, release) for release in releases]
            if release_id is not None
        ]

        query_cols = [
            Column("project_id"),
            Column(release_column_name),
            Function("min", [Column("bucketed_time")], "oldest"),
        ]

        group_by = [
            Column("project_id"),
            Column(release_column_name),
        ]

        where_clause = [
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("metric_id"), Op.EQ, metric_id(org_id,
                                                            "session")),
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, now),
            Condition(Column(release_column_name), Op.IN, releases_ids),
        ]

        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity("metrics_counters"),
            select=query_cols,
            where=where_clause,
            groupby=group_by,
            granularity=Granularity(3600),
        )
        rows = raw_snql_query(
            query,
            referrer=
            "release_health.metrics.get_oldest_health_data_for_releases",
            use_cache=False,
        )["data"]

        result = {}

        for row in rows:
            result[row["project_id"],
                   reverse_tag_value(org_id, row[release_column_name]
                                     )] = row["oldest"]

        return result
예제 #28
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def check_has_health_data(
            self, projects_list: Sequence[ProjectOrRelease]
    ) -> Set[ProjectOrRelease]:
        now = datetime.now(pytz.utc)
        start = now - timedelta(days=3)

        projects_list = list(projects_list)

        if len(projects_list) == 0:
            return set()

        includes_releases = isinstance(projects_list[0], tuple)

        if includes_releases:
            project_ids: List[ProjectId] = [x[0] for x in projects_list
                                            ]  # type: ignore
        else:
            project_ids = projects_list  # type: ignore

        org_id = self._get_org_id(project_ids)

        where_clause = [
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("metric_id"), Op.EQ, metric_id(org_id,
                                                            "session")),
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, now),
        ]

        if includes_releases:
            releases = [x[1] for x in projects_list]  # type: ignore
            release_column_name = tag_key(org_id, "release")
            releases_ids = get_tag_values_list(org_id, releases)
            where_clause.append(
                Condition(Column(release_column_name), Op.IN, releases_ids))
            column_names = ["project_id", release_column_name]

        else:
            column_names = ["project_id"]

        def extract_row_info_func(
            include_releases: bool,
        ) -> Callable[[Mapping[str, Union[int, str]]], ProjectOrRelease]:
            def f(row: Mapping[str, Union[int, str]]) -> ProjectOrRelease:
                if include_releases:
                    return row["project_id"], reverse_tag_value(
                        org_id, row.get(release_column_name))  # type: ignore
                else:
                    return row["project_id"]  # type: ignore

            return f

        extract_row_info = extract_row_info_func(includes_releases)

        query_cols = [Column(column_name) for column_name in column_names]
        group_by_clause = query_cols

        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity(EntityKey.MetricsCounters.value),
            select=query_cols,
            where=where_clause,
            groupby=group_by_clause,
        )

        result = raw_snql_query(
            query,
            referrer="release_health.metrics.check_has_health_data",
            use_cache=False)

        return {extract_row_info(row) for row in result["data"]}
예제 #29
0
파일: metrics.py 프로젝트: KingDEV95/sentry
    def get_release_sessions_time_bounds(
        self,
        project_id: ProjectId,
        release: ReleaseName,
        org_id: OrganizationId,
        environments: Optional[Sequence[EnvironmentName]] = None,
    ) -> ReleaseSessionsTimeBounds:
        select: List[SelectableExpression] = [
            Function("min", [Column("timestamp")], "min"),
            Function("max", [Column("timestamp")], "max"),
        ]

        try:
            where: List[Union[BooleanCondition, Condition]] = [
                Condition(Column("org_id"), Op.EQ, org_id),
                Condition(Column("project_id"), Op.EQ, project_id),
                Condition(Column(tag_key(org_id, "release")), Op.EQ,
                          tag_value(org_id, release)),
                Condition(Column("timestamp"), Op.GTE, datetime.min),
                Condition(Column("timestamp"), Op.LT, datetime.now(pytz.utc)),
            ]

            if environments is not None:
                env_filter = get_tag_values_list(org_id, environments)
                if not env_filter:
                    raise MetricIndexNotFound()

                where.append(
                    Condition(Column(tag_key(org_id, "environment")), Op.IN,
                              env_filter))
        except MetricIndexNotFound:
            # Some filter condition can't be constructed and therefore can't be
            # satisfied.
            #
            # Ignore return type because of https://github.com/python/mypy/issues/8533
            return {
                "sessions_lower_bound": None,
                "sessions_upper_bound": None
            }  # type: ignore

        # XXX(markus): We know that this combination of queries is not fully
        # equivalent to the sessions-table based backend. Example:
        #
        # 1. Session sid=x is started with timestamp started=n
        # 2. Same sid=x is updated with new payload with timestamp started=n - 1
        #
        # Old sessions backend would return [n - 1 ; n - 1] as range.
        # New metrics backend would return [n ; n - 1] as range.
        #
        # We don't yet know if this case is relevant. Session's started
        # timestamp shouldn't really change as session status is updated
        # though.

        try:
            # Take care of initial values for session.started by querying the
            # init counter. This should take care of most cases on its own.
            init_sessions_query = Query(
                dataset=Dataset.Metrics.value,
                match=Entity(EntityKey.MetricsCounters.value),
                select=select,
                where=where + [
                    Condition(Column("metric_id"), Op.EQ,
                              metric_id(org_id, "session")),
                    Condition(Column(tag_key(org_id, "session.status")), Op.EQ,
                              tag_value(org_id, "init")),
                ],
            )

            rows = raw_snql_query(
                init_sessions_query,
                referrer=
                "release_health.metrics.get_release_sessions_time_bounds.init_sessions",
                use_cache=False,
            )["data"]
        except MetricIndexNotFound:
            rows = []

        try:
            # Take care of potential timestamp updates by looking at the metric
            # for session duration, which is emitted once a session is closed ("terminal state")
            #
            # There is a testcase checked in that tests specifically for a
            # session update that lowers session.started. We don't know if that
            # testcase matters particularly.
            terminal_sessions_query = Query(
                dataset=Dataset.Metrics.value,
                match=Entity(EntityKey.MetricsDistributions.value),
                select=select,
                where=where + [
                    Condition(Column("metric_id"), Op.EQ,
                              metric_id(org_id, "session.duration")),
                ],
            )
            rows.extend(
                raw_snql_query(
                    terminal_sessions_query,
                    referrer=
                    "release_health.metrics.get_release_sessions_time_bounds.terminal_sessions",
                    use_cache=False,
                )["data"])
        except MetricIndexNotFound:
            pass

        # This check is added because if there are no sessions found, then the
        # aggregations query return both the sessions_lower_bound and the
        # sessions_upper_bound as `0` timestamp and we do not want that behaviour
        # by default
        # P.S. To avoid confusion the `0` timestamp which is '1970-01-01 00:00:00'
        # is rendered as '0000-00-00 00:00:00' in clickhouse shell
        formatted_unix_start_time = datetime.utcfromtimestamp(0).strftime(
            "%Y-%m-%dT%H:%M:%S+00:00")

        lower_bound: Optional[str] = None
        upper_bound: Optional[str] = None

        for row in rows:
            if set(row.values()) == {formatted_unix_start_time}:
                continue
            if lower_bound is None or row["min"] < lower_bound:
                lower_bound = row["min"]
            if upper_bound is None or row["max"] > upper_bound:
                upper_bound = row["max"]

        if lower_bound is None or upper_bound is None:
            return {
                "sessions_lower_bound": None,
                "sessions_upper_bound": None
            }  # type: ignore

        def iso_format_snuba_datetime(date: str) -> str:
            return datetime.strptime(
                date, "%Y-%m-%dT%H:%M:%S+00:00").isoformat()[:19] + "Z"

        return {  # type: ignore
            "sessions_lower_bound": iso_format_snuba_datetime(lower_bound),
            "sessions_upper_bound": iso_format_snuba_datetime(upper_bound),
        }
예제 #30
0
    def query(
        self,
        projects: Sequence[Project],
        retention_window_start: Optional[datetime],
        group_queryset: QuerySet,
        environments: Sequence[Environment],
        sort_by: str,
        limit: int,
        cursor: Optional[Cursor],
        count_hits: bool,
        paginator_options: Mapping[str, Any],
        search_filters: Sequence[SearchFilter],
        date_from: Optional[datetime],
        date_to: Optional[datetime],
        max_hits=None,
    ) -> CursorResult:

        if not validate_cdc_search_filters(search_filters):
            raise InvalidQueryForExecutor(
                "Search filters invalid for this query executor")

        start, end, retention_date = self.calculate_start_end(
            retention_window_start, search_filters, date_from, date_to)

        if start == retention_date and end == retention_date:
            # Both `start` and `end` must have been trimmed to `retention_date`,
            # so this entire search was against a time range that is outside of
            # retention. We'll return empty results to maintain backwards compatibility
            # with Django search (for now).
            return self.empty_result

        if start >= end:
            # TODO: This maintains backwards compatibility with Django search, but
            # in the future we should find a way to notify the user that their search
            # is invalid.
            return self.empty_result

        e_event = self.entities["event"]
        e_group = self.entities["group"]

        where_conditions = [
            Condition(Column("project_id", e_event), Op.IN,
                      [p.id for p in projects]),
            Condition(Column("timestamp", e_event), Op.GTE, start),
            Condition(Column("timestamp", e_event), Op.LT, end),
        ]
        # TODO: This is still basically only handling status, handle this better once we introduce
        # more conditions.
        for search_filter in search_filters:
            where_conditions.append(
                Condition(Column(search_filter.key.name, e_group), Op.IN,
                          search_filter.value.raw_value))

        if environments:
            # TODO: Should this be handled via filter_keys, once we have a snql compatible version?
            where_conditions.append(
                Condition(Column("environment", e_event), Op.IN,
                          [e.name for e in environments]))

        sort_func = self.aggregation_defs[self.sort_strategies[sort_by]]

        having = []
        if cursor is not None:
            op = Op.GTE if cursor.is_prev else Op.LTE
            having.append(Condition(sort_func, op, cursor.value))

        query = Query(
            "events",
            match=Join([Relationship(e_event, "grouped", e_group)]),
            select=[
                Column("id", e_group),
                replace(sort_func, alias="score"),
            ],
            where=where_conditions,
            groupby=[Column("id", e_group)],
            having=having,
            orderby=[OrderBy(sort_func, direction=Direction.DESC)],
            limit=Limit(limit + 1),
        )

        data = snuba.raw_snql_query(
            query, referrer="search.snuba.cdc_search.query")["data"]

        hits_query = Query(
            "events",
            match=Join([Relationship(e_event, "grouped", e_group)]),
            select=[
                Function("uniq", [Column("id", e_group)], alias="count"),
            ],
            where=where_conditions,
        )
        hits = None
        if count_hits:
            hits = snuba.raw_snql_query(
                hits_query,
                referrer="search.snuba.cdc_search.hits")["data"][0]["count"]

        paginator_results = SequencePaginator(
            [(row["score"], row["g.id"]) for row in data],
            reverse=True,
            **paginator_options,
        ).get_result(limit, cursor, known_hits=hits, max_hits=max_hits)
        # We filter against `group_queryset` here so that we recheck all conditions in Postgres.
        # Since replay between Postgres and Clickhouse can happen, we might get back results that
        # have changed state in Postgres. By rechecking them we guarantee than any returned results
        # have the correct state.
        # TODO: This can result in us returning less than a full page of results, but shouldn't
        # affect cursors. If we want to, we can iterate and query snuba until we manage to get a
        # full page. In practice, this will likely only skip a couple of results at worst, and
        # probably not be noticeable to the user, so holding off for now to reduce complexity.
        groups = group_queryset.in_bulk(paginator_results.results)
        paginator_results.results = [
            groups[k] for k in paginator_results.results if k in groups
        ]
        return paginator_results