コード例 #1
0
ファイル: metrics.py プロジェクト: wangjianweiwei/sentry
    def _extract_data(self, entity, data, groups):
        tags = tuple((key, data[key]) for key in sorted(data.keys()) if key.startswith("tags["))

        metric_name = reverse_resolve(data["metric_id"])
        ops = self._ops_by_metric[metric_name]

        tag_data = groups.setdefault(
            tags,
            {
                "totals": {},
            },
        )

        timestamp = data.pop(TS_COL_GROUP, None)
        if timestamp is not None:
            timestamp = parse_snuba_datetime(timestamp)

        for op in ops:
            key = f"{op}({metric_name})"

            field = _OP_TO_FIELD[entity][op].snuba_alias
            value = data[field]
            if field == "percentiles":
                value = value[Percentile[op].value]

            # If this is time series data, add it to the appropriate series.
            # Else, add to totals
            if timestamp is None:
                tag_data["totals"][key] = finite_or_none(value)
            else:
                series = tag_data.setdefault("series", {}).setdefault(
                    key, len(self._intervals) * [_DEFAULT_AGGREGATES[op]]
                )
                series_index = self._timestamp_index[timestamp]
                series[series_index] = finite_or_none(value)
コード例 #2
0
def get_project_release_stats(project_id,
                              release,
                              stat,
                              rollup,
                              start,
                              end,
                              environments=None):
    assert stat in ("users", "sessions")

    filter_keys = {"project_id": [project_id]}
    conditions = [["release", "=", release]]
    if environments is not None:
        conditions.append(["environment", "IN", environments])

    buckets = int((end - start).total_seconds() / rollup)
    stats = _make_stats(start, rollup, buckets, default=None)

    for rv in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=[
                "bucketed_started",
                "release",
                stat,
                stat + "_crashed",
                stat + "_abnormal",
                stat + "_errored",
                "duration_quantiles",
            ],
            groupby=["bucketed_started", "release", "project_id"],
            start=start,
            end=end,
            rollup=rollup,
            conditions=conditions,
            filter_keys=filter_keys,
    )["data"]:
        ts = parse_snuba_datetime(rv["bucketed_started"])
        bucket = int((end - ts).total_seconds() / rollup)
        stats[bucket][1] = {
            stat: rv[stat],
            stat + "_crashed": rv[stat + "_crashed"],
            stat + "_abnormal": rv[stat + "_abnormal"],
            stat + "_errored": rv[stat + "_errored"] - rv[stat + "_crashed"],
            "duration_p50": _convert_duration(rv["duration_quantiles"][0]),
            "duration_p90": _convert_duration(rv["duration_quantiles"][1]),
        }

    for idx, bucket in enumerate(stats):
        if bucket[1] is None:
            stats[idx][1] = {
                stat: 0,
                stat + "_crashed": 0,
                stat + "_abnormal": 0,
                stat + "_errored": 0,
                "duration_p50": None,
                "duration_p90": None,
            }

    return stats
コード例 #3
0
    def _extract_data(self, data, groups):
        tags = tuple(
            (key, data[key]) for key in sorted(data.keys())
            if (key.startswith("tags[") or key in ALLOWED_GROUPBY_COLUMNS))

        tag_data = groups.setdefault(
            tags,
            {
                "totals": {},
                "series": {}
            },
        )

        bucketed_time = data.pop(TS_COL_GROUP, None)
        if bucketed_time is not None:
            bucketed_time = parse_snuba_datetime(bucketed_time)

        # We query the union of the query_definition fields, and the fields_in_entities from the
        # QueryBuilder necessary as it contains the constituent instances of
        # SingularEntityDerivedMetric for instances of CompositeEntityDerivedMetric
        for op, metric_name in self._set_of_constituent_queries:
            key = f"{op}({metric_name})" if op else metric_name

            default_null_value = metric_object_factory(
                op, metric_name).generate_default_null_values()

            try:
                value = data[key]
            except KeyError:
                # This could occur when we have derived metrics that are generated from post
                # query operations, and so don't have a direct mapping to the query results
                # or also from raw_metrics that don't exist in clickhouse yet
                cleaned_value = default_null_value
            else:
                if op in OPERATIONS_PERCENTILES:
                    value = value[0]
                cleaned_value = finite_or_none(value)

            if bucketed_time is None:
                # Only update the value, when either key does not exist or its a default
                if key not in tag_data["totals"] or tag_data["totals"][
                        key] == default_null_value:
                    tag_data["totals"][key] = cleaned_value

            if bucketed_time is not None or tag_data["totals"][
                    key] == default_null_value:
                empty_values = len(self._intervals) * [default_null_value]
                series = tag_data["series"].setdefault(key, empty_values)

                if bucketed_time is not None:
                    series_index = self._timestamp_index[bucketed_time]
                    if series[series_index] == default_null_value:
                        series[series_index] = cleaned_value
コード例 #4
0
def build_project_series(start__stop, project):
    start, stop = start__stop
    rollup = ONE_DAY

    resolution, series = tsdb.get_optimal_rollup_series(start, stop, rollup)
    assert resolution == rollup, "resolution does not match requested value"

    clean = partial(clean_series, start, stop, rollup)

    def zerofill_clean(data):
        return clean(zerofill(data, start, stop, rollup, fill_default=0))

    # Note: this section can be removed
    issue_ids = project.group_set.filter(status=GroupStatus.RESOLVED,
                                         resolved_at__gte=start,
                                         resolved_at__lt=stop).values_list(
                                             "id", flat=True)

    # TODO: The TSDB calls could be replaced with a SnQL call here
    tsdb_range_resolved = _query_tsdb_groups_chunked(tsdb.get_range, issue_ids,
                                                     start, stop, rollup)
    resolved_error_series = reduce(
        merge_series,
        map(clean, tsdb_range_resolved.values()),
        clean([(timestamp, 0) for timestamp in series]),
    )
    # end

    # Use outcomes to compute total errors and transactions
    outcomes_query = Query(
        dataset=Dataset.Outcomes.value,
        match=Entity("outcomes"),
        select=[
            Column("time"),
            Column("category"),
            Function("sum", [Column("quantity")], "total"),
        ],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
            Condition(Column("org_id"), Op.EQ, project.organization_id),
            Condition(Column("outcome"), Op.EQ, Outcome.ACCEPTED),
            Condition(
                Column("category"),
                Op.IN,
                [*DataCategory.error_categories(), DataCategory.TRANSACTION],
            ),
        ],
        groupby=[Column("time"), Column("category")],
        granularity=Granularity(rollup),
        orderby=[OrderBy(Column("time"), Direction.ASC)],
    )
    outcome_series = raw_snql_query(outcomes_query,
                                    referrer="reports.outcome_series")
    total_error_series = OrderedDict()
    for v in outcome_series["data"]:
        if v["category"] in DataCategory.error_categories():
            timestamp = int(to_timestamp(parse_snuba_datetime(v["time"])))
            total_error_series[timestamp] = total_error_series.get(
                timestamp, 0) + v["total"]

    total_error_series = zerofill_clean(list(total_error_series.items()))
    transaction_series = [(int(to_timestamp(parse_snuba_datetime(v["time"]))),
                           v["total"]) for v in outcome_series["data"]
                          if v["category"] == DataCategory.TRANSACTION]
    transaction_series = zerofill_clean(transaction_series)

    error_series = merge_series(
        resolved_error_series,
        total_error_series,
        lambda resolved, total:
        (resolved, total - resolved),  # Resolved, Unresolved
    )

    # Format of this series: [(resolved , unresolved, transactions)]
    return merge_series(
        error_series,
        transaction_series,
        lambda errors, transactions: errors + (transactions, ),
    )
コード例 #5
0
ファイル: sessions.py プロジェクト: georgbez/sentry
def get_project_release_stats(project_id, release, stat, rollup, start, end, environments=None):
    assert stat in ("users", "sessions")

    filter_keys = {"project_id": [project_id]}
    conditions = [["release", "=", release]]
    if environments is not None:
        conditions.append(["environment", "IN", environments])

    buckets = int((end - start).total_seconds() / rollup)
    stats = _make_stats(start, rollup, buckets, default=None)

    totals = {stat: 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0}

    for rv in raw_query(
        dataset=Dataset.Sessions,
        selected_columns=[
            "bucketed_started",
            stat,
            stat + "_crashed",
            stat + "_abnormal",
            stat + "_errored",
            "duration_quantiles",
        ],
        groupby=["bucketed_started"],
        start=start,
        end=end,
        rollup=rollup,
        conditions=conditions,
        filter_keys=filter_keys,
    )["data"]:
        ts = parse_snuba_datetime(rv["bucketed_started"])
        bucket = int((ts - start).total_seconds() / rollup)
        stats[bucket][1] = {
            stat: rv[stat],
            stat + "_crashed": rv[stat + "_crashed"],
            stat + "_abnormal": rv[stat + "_abnormal"],
            stat + "_errored": rv[stat + "_errored"] - rv[stat + "_crashed"],
            "duration_p50": _convert_duration(rv["duration_quantiles"][0]),
            "duration_p90": _convert_duration(rv["duration_quantiles"][1]),
        }

        # Session stats we can sum up directly without another query
        # as the data becomes available.
        if stat == "sessions":
            for k in totals:
                totals[k] += rv[k]

    for idx, bucket in enumerate(stats):
        if bucket[1] is None:
            stats[idx][1] = {
                stat: 0,
                stat + "_crashed": 0,
                stat + "_abnormal": 0,
                stat + "_errored": 0,
                "duration_p50": None,
                "duration_p90": None,
            }

    # For users we need a secondary query over the entire time range
    if stat == "users":
        rows = raw_query(
            dataset=Dataset.Sessions,
            selected_columns=["users", "users_crashed", "users_abnormal", "users_errored"],
            start=start,
            end=end,
            conditions=conditions,
            filter_keys=filter_keys,
        )["data"]
        if rows:
            rv = rows[0]
            totals = {
                "users": rv["users"],
                "users_crashed": rv["users_crashed"],
                "users_abnormal": rv["users_abnormal"],
                "users_errored": rv["users_errored"] - rv["users_crashed"],
            }

    return stats, totals
コード例 #6
0
ファイル: sessions.py プロジェクト: georgbez/sentry
def get_release_health_data_overview(
    project_releases,
    environments=None,
    summary_stats_period=None,
    health_stats_period=None,
    stat=None,
):
    """Checks quickly for which of the given project releases we have
    health data available.  The argument is a tuple of `(project_id, release_name)`
    tuples.  The return value is a set of all the project releases that have health
    data.
    """
    if stat is None:
        stat = "sessions"
    assert stat in ("sessions", "users")

    _, summary_start, _ = get_rollup_starts_and_buckets(summary_stats_period or "24h")
    conditions, filter_keys = _get_conditions_and_filter_keys(project_releases, environments)

    stats_rollup, stats_start, stats_buckets = get_rollup_starts_and_buckets(health_stats_period)

    missing_releases = set(project_releases)
    rv = {}
    for x in raw_query(
        dataset=Dataset.Sessions,
        selected_columns=[
            "release",
            "project_id",
            "duration_quantiles",
            "users",
            "sessions",
            "sessions_errored",
            "sessions_crashed",
            "users_crashed",
        ],
        groupby=["release", "project_id"],
        start=summary_start,
        conditions=conditions,
        filter_keys=filter_keys,
    )["data"]:
        rp = {
            "duration_p50": _convert_duration(x["duration_quantiles"][0]),
            "duration_p90": _convert_duration(x["duration_quantiles"][1]),
            "crash_free_users": (
                100 - x["users_crashed"] / float(x["users"]) * 100 if x["users"] else None
            ),
            "crash_free_sessions": (
                100 - x["sessions_crashed"] / float(x["sessions"]) * 100 if x["sessions"] else None
            ),
            "total_users": x["users"],
            "total_sessions": x["sessions"],
            "sessions_crashed": x["sessions_crashed"],
            "sessions_errored": x["sessions_errored"],
            "has_health_data": True,
        }
        if health_stats_period:
            rp["stats"] = {
                health_stats_period: _make_stats(stats_start, stats_rollup, stats_buckets)
            }
        rv[x["project_id"], x["release"]] = rp
        missing_releases.discard((x["project_id"], x["release"]))

    # Add releases without data points
    if missing_releases:
        # If we're already looking at a 90 day horizont we don't need to
        # fire another query, we can already assume there is no data.
        if summary_stats_period != "90d":
            has_health_data = check_has_health_data(missing_releases)
        else:
            has_health_data = ()
        for key in missing_releases:
            rv[key] = {
                "duration_p50": None,
                "duration_p90": None,
                "crash_free_users": None,
                "crash_free_sessions": None,
                "total_users": 0,
                "total_sessions": 0,
                "sessions_crashed": 0,
                "sessions_errored": 0,
                "has_health_data": key in has_health_data,
            }
            if health_stats_period:
                rv[key]["stats"] = {
                    health_stats_period: _make_stats(stats_start, stats_rollup, stats_buckets)
                }

    # Fill in release adoption
    release_adoption = get_release_adoption(project_releases, environments)
    for key in rv:
        adoption_info = release_adoption.get(key) or {}
        rv[key]["adoption"] = adoption_info.get("adoption")
        rv[key]["total_users_24h"] = adoption_info.get("users_24h")
        rv[key]["total_sessions_24h"] = adoption_info.get("sessions_24h")

    if health_stats_period:
        for x in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=["release", "project_id", "bucketed_started", stat],
            groupby=["release", "project_id", "bucketed_started"],
            rollup=stats_rollup,
            start=stats_start,
            conditions=conditions,
            filter_keys=filter_keys,
        )["data"]:
            time_bucket = int(
                (parse_snuba_datetime(x["bucketed_started"]) - stats_start).total_seconds()
                / stats_rollup
            )
            rv[x["project_id"], x["release"]]["stats"][health_stats_period][time_bucket][1] = x[
                stat
            ]

    return rv
コード例 #7
0
def get_project_release_stats(project_id,
                              release,
                              stat,
                              rollup,
                              start,
                              end,
                              environments=None):
    assert stat in ("users", "sessions")

    # since snuba end queries are exclusive of the time and we're bucketing to
    # a full hour, we need to round to the next hour since snuba is exclusive
    # on the end.
    end = to_datetime(
        (to_timestamp(end) // DATASET_BUCKET + 1) * DATASET_BUCKET)

    filter_keys = {"project_id": [project_id]}
    conditions = [["release", "=", release]]
    if environments is not None:
        conditions.append(["environment", "IN", environments])

    buckets = int((end - start).total_seconds() / rollup)
    stats = _make_stats(start, rollup, buckets, default=None)

    # Due to the nature of the probabilistic data structures some
    # subtractions can become negative.  As such we're making sure a number
    # never goes below zero to avoid confusion.

    totals = {
        stat: 0,
        stat + "_healthy": 0,
        stat + "_crashed": 0,
        stat + "_abnormal": 0,
        stat + "_errored": 0,
    }

    for rv in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=[
                "bucketed_started",
                stat,
                stat + "_crashed",
                stat + "_abnormal",
                stat + "_errored",
                "duration_quantiles",
            ],
            groupby=["bucketed_started"],
            start=start,
            end=end,
            rollup=rollup,
            conditions=conditions,
            filter_keys=filter_keys,
            referrer="sessions.release-stats-details",
    )["data"]:
        ts = parse_snuba_datetime(rv["bucketed_started"])
        bucket = int((ts - start).total_seconds() / rollup)
        stats[bucket][1] = {
            stat:
            rv[stat],
            stat + "_healthy":
            max(0, rv[stat] - rv[stat + "_errored"]),
            stat + "_crashed":
            rv[stat + "_crashed"],
            stat + "_abnormal":
            rv[stat + "_abnormal"],
            stat + "_errored":
            max(
                0, rv[stat + "_errored"] - rv[stat + "_crashed"] -
                rv[stat + "_abnormal"]),
            "duration_p50":
            _convert_duration(rv["duration_quantiles"][0]),
            "duration_p90":
            _convert_duration(rv["duration_quantiles"][1]),
        }

        # Session stats we can sum up directly without another query
        # as the data becomes available.
        if stat == "sessions":
            for k in totals:
                totals[k] += stats[bucket][1][k]

    for idx, bucket in enumerate(stats):
        if bucket[1] is None:
            stats[idx][1] = {
                stat: 0,
                stat + "_healthy": 0,
                stat + "_crashed": 0,
                stat + "_abnormal": 0,
                stat + "_errored": 0,
                "duration_p50": None,
                "duration_p90": None,
            }

    # For users we need a secondary query over the entire time range
    if stat == "users":
        rows = raw_query(
            dataset=Dataset.Sessions,
            selected_columns=[
                "users", "users_crashed", "users_abnormal", "users_errored"
            ],
            start=start,
            end=end,
            conditions=conditions,
            filter_keys=filter_keys,
            referrer="sessions.crash-free-breakdown-users",
        )["data"]
        if rows:
            rv = rows[0]
            totals = {
                "users":
                rv["users"],
                "users_healthy":
                max(0, rv["users"] - rv["users_errored"]),
                "users_crashed":
                rv["users_crashed"],
                "users_abnormal":
                rv["users_abnormal"],
                "users_errored":
                max(
                    0, rv["users_errored"] - rv["users_crashed"] -
                    rv["users_abnormal"]),
            }

    return stats, totals
コード例 #8
0
def _get_release_health_data_overview(
    project_releases,
    environments=None,
    summary_stats_period=None,
    health_stats_period=None,
    stat=None,
    now=None,
):
    """Checks quickly for which of the given project releases we have
    health data available.  The argument is a tuple of `(project_id, release_name)`
    tuples.  The return value is a set of all the project releases that have health
    data.
    """
    if stat is None:
        stat = "sessions"
    assert stat in ("sessions", "users")

    _, summary_start, _ = get_rollup_starts_and_buckets(summary_stats_period
                                                        or "24h",
                                                        now=now)
    conditions, filter_keys = _get_conditions_and_filter_keys(
        project_releases, environments)

    stats_rollup, stats_start, stats_buckets = get_rollup_starts_and_buckets(
        health_stats_period, now=now)

    missing_releases = set(project_releases)
    rv = {}
    for x in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=[
                "release",
                "project_id",
                "duration_quantiles",
                "sessions",
                "sessions_errored",
                "sessions_crashed",
                "sessions_abnormal",
                "users",
                "users_crashed",
            ],
            groupby=["release", "project_id"],
            start=summary_start,
            conditions=conditions,
            filter_keys=filter_keys,
            referrer="sessions.release-overview",
    )["data"]:
        rp = {
            "crash_free_users":
            (100 - x["users_crashed"] / float(x["users"]) * 100
             if x["users"] else None),
            "crash_free_sessions":
            (100 - x["sessions_crashed"] / float(x["sessions"]) * 100
             if x["sessions"] else None),
            "total_users":
            x["users"],
            "total_sessions":
            x["sessions"],
            "sessions_crashed":
            x["sessions_crashed"],
            "sessions_errored":
            max(
                0, x["sessions_errored"] - x["sessions_crashed"] -
                x["sessions_abnormal"]),
            "has_health_data":
            True,
        }
        rp.update(extract_duration_quantiles(x))
        if health_stats_period:
            rp["stats"] = {
                health_stats_period:
                _make_stats(stats_start, stats_rollup, stats_buckets)
            }
        rv[x["project_id"], x["release"]] = rp
        missing_releases.discard((x["project_id"], x["release"]))

    # Add releases without data points
    if missing_releases:
        # If we're already looking at a 90 day horizon we don't need to
        # fire another query, we can already assume there is no data.
        if summary_stats_period != "90d":
            has_health_data = release_health.check_has_health_data(
                missing_releases)
        else:
            has_health_data = ()
        for key in missing_releases:
            rv[key] = {
                "duration_p50": None,
                "duration_p90": None,
                "crash_free_users": None,
                "crash_free_sessions": None,
                "total_users": 0,
                "total_sessions": 0,
                "sessions_crashed": 0,
                "sessions_errored": 0,
                "has_health_data": key in has_health_data,
            }
            if health_stats_period:
                rv[key]["stats"] = {
                    health_stats_period:
                    _make_stats(stats_start, stats_rollup, stats_buckets)
                }

    release_adoption = release_health.get_release_adoption(
        project_releases, environments)
    for key in rv:
        adoption_info = release_adoption.get(key) or {}
        rv[key]["adoption"] = adoption_info.get("adoption")
        rv[key]["sessions_adoption"] = adoption_info.get("sessions_adoption")
        rv[key]["total_users_24h"] = adoption_info.get("users_24h")
        rv[key]["total_project_users_24h"] = adoption_info.get(
            "project_users_24h")
        rv[key]["total_sessions_24h"] = adoption_info.get("sessions_24h")
        rv[key]["total_project_sessions_24h"] = adoption_info.get(
            "project_sessions_24h")

    if health_stats_period:
        for x in raw_query(
                dataset=Dataset.Sessions,
                selected_columns=[
                    "release", "project_id", "bucketed_started", stat
                ],
                groupby=["release", "project_id", "bucketed_started"],
                rollup=stats_rollup,
                start=stats_start,
                conditions=conditions,
                filter_keys=filter_keys,
                referrer="sessions.release-stats",
        )["data"]:
            time_bucket = int((parse_snuba_datetime(x["bucketed_started"]) -
                               stats_start).total_seconds() / stats_rollup)
            key = (x["project_id"], x["release"])
            # Sometimes this might return a release we haven't seen yet or it might
            # return a time bucket that did not exist yet at the time of the initial
            # query.  In that case, just skip it.
            if key in rv and time_bucket < len(
                    rv[key]["stats"][health_stats_period]):
                rv[key]["stats"][health_stats_period][time_bucket][1] = x[stat]

    return rv
コード例 #9
0
ファイル: sessions.py プロジェクト: y1024/sentry
def get_release_health_data_overview(project_releases,
                                     environments=None,
                                     stats_period=None):
    """Checks quickly for which of the given project releases we have
    health data available.  The argument is a tuple of `(project_id, release_name)`
    tuples.  The return value is a set of all the project releases that have health
    data.
    """
    def _nan_as_none(val):
        return None if val != val else val

    yesterday = datetime.now(pytz.utc) - timedelta(days=1)
    conditions, filter_keys = _get_conditions_and_filter_keys(
        project_releases, environments)

    if stats_period == "24h":
        stats_rollup = 3600
        stats_start = yesterday
        stats_buckets = 24
    elif stats_period == "14d":
        stats_rollup = 86400
        stats_start = datetime.now(pytz.utc) - timedelta(days=14)
        stats_buckets = 14
    elif not stats_period:
        stats_rollup = None
        stats_start = None
    else:
        raise TypeError("Invalid stats period")

    total_users_24h = {}
    for x in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=["release", "users"],
            groupby=["release", "project_id"],
            start=yesterday,
            conditions=conditions,
            filter_keys=filter_keys,
    )["data"]:
        total_users_24h[x["project_id"]] = x["users"]

    rv = {}
    for x in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=[
                "release",
                "project_id",
                "duration_quantiles",
                "users",
                "sessions",
                "sessions_errored",
                "sessions_crashed",
                "users_crashed",
            ],
            groupby=["release", "project_id"],
            start=yesterday,
            conditions=conditions,
            filter_keys=filter_keys,
    )["data"]:
        total_users = total_users_24h.get(x["project_id"])
        rp = {
            "duration_p50":
            _nan_as_none(x["duration_quantiles"][0]),
            "duration_p90":
            _nan_as_none(x["duration_quantiles"][1]),
            "crash_free_users":
            (100 - x["users_crashed"] / float(x["users"]) * 100
             if x["users"] else None),
            "crash_free_sessions":
            (100 - x["sessions_crashed"] / float(x["sessions"]) * 100
             if x["sessions"] else None),
            "total_users":
            x["users"],
            "total_sessions":
            x["sessions"],
            "sessions_crashed":
            x["sessions_crashed"],
            "sessions_errored":
            x["sessions_errored"],
            "adoption":
            x["users"] / total_users *
            100 if total_users and x["users"] else None,
        }
        if stats_period:
            rp["stats"] = {
                stats_period: _make_stats(stats_start, stats_rollup,
                                          stats_buckets)
            }
        rv[x["project_id"], x["release"]] = rp

    if stats_period:
        for x in raw_query(
                dataset=Dataset.Sessions,
                selected_columns=[
                    "release", "project_id", "bucketed_started", "sessions"
                ],
                groupby=["release", "project_id", "bucketed_started"],
                rollup=stats_rollup,
                start=stats_start,
                conditions=conditions,
                filter_keys=filter_keys,
        )["data"]:
            time_bucket = int((parse_snuba_datetime(x["bucketed_started"]) -
                               stats_start).total_seconds() / stats_rollup)
            rv[x["project_id"], x["release"]]["stats"][stats_period][
                time_bucket][1] = x["sessions"]

    return rv
コード例 #10
0
def get_release_health_data_overview(project_releases,
                                     environments=None,
                                     summary_stats_period=None,
                                     health_stats_period=None):
    """Checks quickly for which of the given project releases we have
    health data available.  The argument is a tuple of `(project_id, release_name)`
    tuples.  The return value is a set of all the project releases that have health
    data.
    """
    def _nan_as_none(val):
        return None if val != val else val

    _, summary_start, _ = get_rollup_starts_and_buckets(summary_stats_period
                                                        or "24h")
    conditions, filter_keys = _get_conditions_and_filter_keys(
        project_releases, environments)

    stats_rollup, stats_start, stats_buckets = get_rollup_starts_and_buckets(
        health_stats_period)

    total_users = {}
    for x in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=["release", "users"],
            groupby=["release", "project_id"],
            start=summary_start,
            conditions=conditions,
            filter_keys=filter_keys,
    )["data"]:
        total_users[x["project_id"]] = x["users"]

    missing_releases = set(project_releases)
    rv = {}
    for x in raw_query(
            dataset=Dataset.Sessions,
            selected_columns=[
                "release",
                "project_id",
                "duration_quantiles",
                "users",
                "sessions",
                "sessions_errored",
                "sessions_crashed",
                "users_crashed",
            ],
            groupby=["release", "project_id"],
            start=summary_start,
            conditions=conditions,
            filter_keys=filter_keys,
    )["data"]:
        x_total_users = total_users.get(x["project_id"])
        rp = {
            "duration_p50":
            _nan_as_none(x["duration_quantiles"][0]),
            "duration_p90":
            _nan_as_none(x["duration_quantiles"][1]),
            "crash_free_users":
            (100 - x["users_crashed"] / float(x["users"]) * 100
             if x["users"] else None),
            "crash_free_sessions":
            (100 - x["sessions_crashed"] / float(x["sessions"]) * 100
             if x["sessions"] else None),
            "total_users":
            x["users"],
            "total_sessions":
            x["sessions"],
            "sessions_crashed":
            x["sessions_crashed"],
            "sessions_errored":
            x["sessions_errored"],
            "adoption":
            x["users"] / x_total_users *
            100 if x_total_users and x["users"] else None,
            "has_health_data":
            True,
        }
        if health_stats_period:
            rp["stats"] = {
                health_stats_period:
                _make_stats(stats_start, stats_rollup, stats_buckets)
            }
        rv[x["project_id"], x["release"]] = rp
        missing_releases.discard((x["project_id"], x["release"]))

    # Add releases without data points
    if missing_releases:
        has_health_data = check_has_health_data(missing_releases)
        for key in missing_releases:
            rv[key] = {
                "duration_p50": None,
                "duration_p90": None,
                "crash_free_users": None,
                "crash_free_sessions": None,
                "total_users": 0,
                "total_sessions": 0,
                "sessions_crashed": 0,
                "sessions_errored": 0,
                "adoption": None,
                "has_health_data": key in has_health_data,
            }
            if health_stats_period:
                rv[key]["stats"] = {
                    health_stats_period:
                    _make_stats(stats_start, stats_rollup, stats_buckets)
                }

    if health_stats_period:
        for x in raw_query(
                dataset=Dataset.Sessions,
                selected_columns=[
                    "release", "project_id", "bucketed_started", "sessions"
                ],
                groupby=["release", "project_id", "bucketed_started"],
                rollup=stats_rollup,
                start=stats_start,
                conditions=conditions,
                filter_keys=filter_keys,
        )["data"]:
            time_bucket = int((parse_snuba_datetime(x["bucketed_started"]) -
                               stats_start).total_seconds() / stats_rollup)
            rv[x["project_id"], x["release"]]["stats"][health_stats_period][
                time_bucket][1] = x["sessions"]

    return rv