Ejemplo n.º 1
0
def get_incident_event_stats(incident,
                             start=None,
                             end=None,
                             windowed_stats=False):
    """
    Gets event stats for an incident. If start/end are provided, uses that time
    period, otherwise uses the incident start/current_end.
    """
    query_params = build_incident_query_params(incident,
                                               start=start,
                                               end=end,
                                               windowed_stats=windowed_stats)
    aggregations = query_params.pop("aggregations")[0]
    snuba_params = SnubaQueryParams(
        aggregations=[(aggregations[0], aggregations[1], "count")],
        orderby="time",
        groupby=["time"],
        rollup=incident.alert_rule.snuba_query.time_window,
        limit=10000,
        **query_params)

    results = bulk_raw_query([snuba_params],
                             referrer="incidents.get_incident_event_stats")
    return SnubaTSResult(results[0], snuba_params.start, snuba_params.end,
                         snuba_params.rollup)
Ejemplo n.º 2
0
def get_incident_stats(incident, windowed_stats=False):
    """
    Returns stats for an incident. This includes unique user count, total event count
    and event stats.
    Note that even though this function accepts a windowed_stats parameter, it does not
    affect the snapshots. Only the live fetched stats.
    """
    if windowed_stats and incident.status == IncidentStatus.CLOSED.value:
        # At the moment, snapshots are only ever created with windowed_stats as True
        # so if they send False, we need to do a live calculation below.
        try:
            snapshot = IncidentSnapshot.objects.get(incident=incident)
            event_stats = snapshot.event_stats_snapshot
            return {
                "event_stats":
                SnubaTSResult(event_stats.snuba_values, event_stats.start,
                              event_stats.end, event_stats.period),
                "total_events":
                snapshot.total_events,
                "unique_users":
                snapshot.unique_users,
            }
        except IncidentSnapshot.DoesNotExist:
            pass

    event_stats = get_incident_event_stats(incident,
                                           windowed_stats=windowed_stats)
    aggregates = get_incident_aggregates(incident)
    return {
        "event_stats": event_stats,
        "total_events": aggregates["count"],
        "unique_users": aggregates["unique_users"],
    }
Ejemplo n.º 3
0
def bulk_get_incident_stats(incidents):
    """
    Returns bulk stats for a list of incidents. This includes unique user count,
    total event count and event stats.
    """
    closed = [i for i in incidents if i.status == IncidentStatus.CLOSED.value]
    incident_stats = {}
    snapshots = IncidentSnapshot.objects.filter(incident__in=closed)
    for snapshot in snapshots:
        event_stats = snapshot.event_stats_snapshot
        incident_stats[snapshot.incident_id] = {
            "event_stats": SnubaTSResult(
                event_stats.snuba_values, event_stats.start, event_stats.end, event_stats.period
            ),
            "total_events": snapshot.total_events,
            "unique_users": snapshot.unique_users,
        }

    to_fetch = [i for i in incidents if i.id not in incident_stats]
    if to_fetch:
        query_params_list = bulk_build_incident_query_params(to_fetch)
        all_event_stats = bulk_get_incident_event_stats(to_fetch, query_params_list)
        all_aggregates = bulk_get_incident_aggregates(query_params_list)
        for incident, event_stats, aggregates in zip(to_fetch, all_event_stats, all_aggregates):
            incident_stats[incident.id] = {
                "event_stats": event_stats,
                "total_events": aggregates["count"],
                "unique_users": aggregates["unique_users"],
            }

    return [incident_stats[incident.id] for incident in incidents]
Ejemplo n.º 4
0
def timeseries_query(selected_columns,
                     query,
                     params,
                     rollup,
                     reference_event=None,
                     referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    with sentry_sdk.start_span(
            op="discover.discover",
            description="timeseries.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter, _ = get_timeseries_snuba_filter(selected_columns, query,
                                                      params, rollup,
                                                      reference_event)

    with sentry_sdk.start_span(op="discover.discover",
                               description="timeseries.snuba_query"):
        result = raw_query(
            aggregations=snuba_filter.aggregations,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            start=snuba_filter.start,
            end=snuba_filter.end,
            rollup=rollup,
            orderby="time",
            groupby=["time"],
            dataset=Dataset.Discover,
            limit=10000,
            referrer=referrer,
        )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="timeseries.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        result = zerofill(result["data"], snuba_filter.start, snuba_filter.end,
                          rollup, "time")

        return SnubaTSResult({"data": result}, snuba_filter.start,
                             snuba_filter.end, rollup)
Ejemplo n.º 5
0
def bulk_get_incident_event_stats(incidents, query_params_list):
    snuba_params_list = [
        SnubaQueryParams(
            aggregations=[(
                query_aggregation_to_snuba[QueryAggregations(
                    incident.aggregation)][0],
                query_aggregation_to_snuba[QueryAggregations(
                    incident.aggregation)][1],
                "count",
            )],
            orderby="time",
            groupby=["time"],
            rollup=incident.alert_rule.time_window *
            60 if incident.alert_rule is not None else 1 *
            60,  # TODO: When time_window is persisted, switch to using that instead of alert_rule.time_window.
            limit=10000,
            **query_param)
        for incident, query_param in zip(incidents, query_params_list)
    ]
    results = bulk_raw_query(snuba_params_list,
                             referrer="incidents.get_incident_event_stats")
    return [
        SnubaTSResult(result, snuba_params.start, snuba_params.end,
                      snuba_params.rollup)
        for snuba_params, result in zip(snuba_params_list, results)
    ]
Ejemplo n.º 6
0
def bulk_get_incident_event_stats(incidents,
                                  query_params_list,
                                  data_points=50):
    snuba_params_list = [
        SnubaQueryParams(
            aggregations=[(
                query_aggregation_to_snuba[QueryAggregations(
                    incident.aggregation)][0],
                query_aggregation_to_snuba[QueryAggregations(
                    incident.aggregation)][1],
                "count",
            )],
            orderby="time",
            groupby=["time"],
            rollup=max(int(incident.duration.total_seconds() / data_points),
                       1),
            limit=10000,
            **query_param)
        for incident, query_param in zip(incidents, query_params_list)
    ]
    results = bulk_raw_query(snuba_params_list,
                             referrer="incidents.get_incident_event_stats")
    return [
        SnubaTSResult(result, snuba_params.start, snuba_params.end,
                      snuba_params.rollup)
        for snuba_params, result in zip(snuba_params_list, results)
    ]
Ejemplo n.º 7
0
    def get(self, request, organization):
        try:
            snuba_args = self.get_snuba_query_args(request, organization)
        except OrganizationEventsError as exc:
            return Response({'detail': exc.message}, status=400)
        except NoProjects:
            return Response({'data': []})

        interval = parse_stats_period(request.GET.get('interval', '1h'))
        if interval is None:
            interval = timedelta(hours=1)

        rollup = int(interval.total_seconds())

        result = raw_query(aggregations=[
            ('count()', '', 'count'),
        ],
                           orderby='time',
                           groupby=['time'],
                           rollup=rollup,
                           referrer='api.organization-events-stats',
                           limit=10000,
                           **snuba_args)

        serializer = SnubaTSResultSerializer(organization, None, request.user)
        return Response(
            serializer.serialize(
                SnubaTSResult(result, snuba_args['start'], snuba_args['end'],
                              rollup), ),
            status=200,
        )
Ejemplo n.º 8
0
    def serialize(self, obj, attrs, user):
        incident = obj.incident

        event_stats = None
        if obj.event_stats_snapshot:
            serializer = SnubaTSResultSerializer(
                obj.incident.organization,
                None,
                user,
            )
            event_stats = serializer.serialize(
                SnubaTSResult(
                    obj.event_stats_snapshot.snuba_values,
                    obj.event_stats_snapshot.start,
                    obj.event_stats_snapshot.end,
                    obj.event_stats_snapshot.period,
                ))

        return {
            'id': six.text_type(obj.id),
            'incidentIdentifier': six.text_type(incident.identifier),
            'user': attrs['user'],
            'type': obj.type,
            'value': obj.value,
            'previousValue': obj.previous_value,
            'comment': obj.comment,
            'eventStats': event_stats,
            'dateCreated': obj.date_added,
        }
Ejemplo n.º 9
0
def get_incident_event_stats(incident, start=None, end=None, windowed_stats=False):
    """
    Gets event stats for an incident. If start/end are provided, uses that time
    period, otherwise uses the incident start/current_end.
    """
    query_params = build_incident_query_params(
        incident, start=start, end=end, windowed_stats=windowed_stats
    )
    time_window = incident.alert_rule.snuba_query.time_window
    aggregations = query_params.pop("aggregations")[0]
    snuba_params = [
        SnubaQueryParams(
            aggregations=[(aggregations[0], aggregations[1], "count")],
            orderby="time",
            groupby=["time"],
            rollup=time_window,
            limit=10000,
            **query_params
        )
    ]

    # We want to include the specific buckets for the incident start and closed times,
    # so that there's no need to interpolate to show them on the frontend. If they're
    # cleanly divisible by the `time_window` then there's no need to fetch, since
    # they'll be included in the standard results anyway.
    extra_buckets = []
    if int(to_timestamp(incident.date_started)) % time_window:
        extra_buckets.append(incident.date_started)
    if incident.date_closed and int(to_timestamp(incident.date_closed)) % time_window:
        extra_buckets.append(incident.date_closed.replace(second=0, microsecond=0))

    # We make extra queries to fetch these buckets
    for bucket_start in extra_buckets:
        extra_bucket_query_params = build_incident_query_params(
            incident, start=bucket_start, end=bucket_start + timedelta(seconds=time_window)
        )
        aggregations = extra_bucket_query_params.pop("aggregations")[0]
        snuba_params.append(
            SnubaQueryParams(
                aggregations=[(aggregations[0], aggregations[1], "count")],
                limit=1,
                **extra_bucket_query_params
            )
        )

    results = bulk_raw_query(snuba_params, referrer="incidents.get_incident_event_stats")
    # Once we receive the results, if we requested extra buckets we now need to label
    # them with timestamp data, since the query we ran only returns the count.
    for extra_start, result in zip(extra_buckets, results[1:]):
        result["data"][0]["time"] = int(to_timestamp(extra_start))
    merged_data = list(chain(*[r["data"] for r in results]))
    merged_data.sort(key=lambda row: row["time"])
    results[0]["data"] = merged_data

    return SnubaTSResult(
        results[0], snuba_params[0].start, snuba_params[0].end, snuba_params[0].rollup
    )
Ejemplo n.º 10
0
        def get_event_stats(
            query_columns: Sequence[str],
            query: str,
            params: Dict[str, str],
            rollup: int,
            zerofill_results: bool,
            comparison_delta: Optional[datetime] = None,
        ) -> SnubaTSResult:
            with sentry_sdk.start_span(
                op="discover.discover", description="timeseries.filter_transform"
            ):
                builder = TimeseriesQueryBuilder(
                    Dataset.Discover,
                    params,
                    rollup,
                    query=query,
                    selected_columns=query_columns,
                    functions_acl=["array_join", "percentileArray", "sumArray"],
                )

                span_op_column = builder.resolve_function("array_join(spans_op)")
                span_group_column = builder.resolve_function("array_join(spans_group)")

                # Adding spans.op and spans.group to the group by because
                # We need them in the query to help the array join optimizer
                # in snuba take effect but the TimeseriesQueryBuilder
                # removes all non aggregates from the select clause.
                builder.groupby.extend([span_op_column, span_group_column])

                builder.add_conditions(
                    [
                        Condition(
                            Function("tuple", [span_op_column, span_group_column]),
                            Op.IN,
                            Function("tuple", [Function("tuple", [span.op, span.group])]),
                        ),
                    ]
                )

                snql_query = builder.get_snql_query()
                results = raw_snql_query(
                    snql_query, "api.organization-events-spans-performance-stats"
                )

            with sentry_sdk.start_span(
                op="discover.discover", description="timeseries.transform_results"
            ):
                result = discover.zerofill(
                    results["data"],
                    params["start"],
                    params["end"],
                    rollup,
                    "time",
                )

            return SnubaTSResult({"data": result}, params["start"], params["end"], rollup)
Ejemplo n.º 11
0
def key_transaction_timeseries_query(selected_columns, query, params, rollup,
                                     referrer, queryset):
    """ Given a queryset of KeyTransactions perform a timeseries query

        This function is intended to match the `timeseries_query` function,
        but exists to avoid including conditions as a parameter on that function.

        selected_columns (Sequence[str]) List of public aliases to fetch.
        query (str) Filter query string to create conditions from.
        params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
        rollup (int) The bucket width in seconds
        referrer (str|None) A referrer string to help locate the origin of this query.
        queryset (QuerySet) Filtered QuerySet of KeyTransactions
    """
    with sentry_sdk.start_span(
            op="discover.discover",
            description="kt_timeseries.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter, _ = get_timeseries_snuba_filter(selected_columns, query,
                                                      params, rollup)

    if queryset.exists():
        snuba_filter.conditions.extend(key_transaction_conditions(queryset))

        with sentry_sdk.start_span(op="discover.discover",
                                   description="kt_timeseries.snuba_query"):
            result = raw_query(
                aggregations=snuba_filter.aggregations,
                conditions=snuba_filter.conditions,
                filter_keys=snuba_filter.filter_keys,
                start=snuba_filter.start,
                end=snuba_filter.end,
                rollup=rollup,
                orderby="time",
                groupby=["time"],
                dataset=Dataset.Discover,
                limit=10000,
                referrer=referrer,
            )
    else:
        result = {"data": []}

    with sentry_sdk.start_span(
            op="discover.discover",
            description="kt_timeseries.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        result = zerofill(result["data"], snuba_filter.start, snuba_filter.end,
                          rollup, "time")

        return SnubaTSResult({"data": result}, snuba_filter.start,
                             snuba_filter.end, rollup)
Ejemplo n.º 12
0
def bulk_get_incident_stats(incidents, windowed_stats=False):
    """
    Returns bulk stats for a list of incidents. This includes unique user count,
    total event count and event stats.
    Note that even though this function accepts a windowed_stats parameter, it does not
    affect the snapshots. Only the live fetched stats.
    """
    incident_stats = {}
    if windowed_stats:
        # At the moment, snapshots are only ever created with windowed_stats as True
        # so if they send False, we need to do a live calculation below.
        closed = [
            i for i in incidents if i.status == IncidentStatus.CLOSED.value
        ]
        snapshots = IncidentSnapshot.objects.filter(incident__in=closed)
        for snapshot in snapshots:
            event_stats = snapshot.event_stats_snapshot
            incident_stats[snapshot.incident_id] = {
                "event_stats":
                SnubaTSResult(event_stats.snuba_values, event_stats.start,
                              event_stats.end, event_stats.period),
                "total_events":
                snapshot.total_events,
                "unique_users":
                snapshot.unique_users,
            }

    to_fetch = [i for i in incidents if i.id not in incident_stats]
    if to_fetch:
        query_params_list = bulk_build_incident_query_params(
            to_fetch, windowed_stats=False)
        if windowed_stats:
            windowed_query_params_list = bulk_build_incident_query_params(
                to_fetch, windowed_stats=True)
            all_event_stats = bulk_get_incident_event_stats(
                to_fetch, windowed_query_params_list)
        else:
            all_event_stats = bulk_get_incident_event_stats(
                to_fetch, query_params_list)
        all_aggregates = bulk_get_incident_aggregates(query_params_list)
        for incident, event_stats, aggregates in zip(to_fetch, all_event_stats,
                                                     all_aggregates):
            incident_stats[incident.id] = {
                "event_stats": event_stats,
                "total_events": aggregates["count"],
                "unique_users": aggregates["unique_users"],
            }

    return [incident_stats[incident.id] for incident in incidents]
Ejemplo n.º 13
0
def get_incident_event_stats(incident, data_points=20):
    kwargs = build_incident_query_params(incident)
    rollup = max(int(incident.duration.total_seconds() / data_points), 1)
    return SnubaTSResult(
        raw_query(
            aggregations=[
                ('count()', '', 'count'),
            ],
            orderby='time',
            groupby=['time'],
            rollup=rollup,
            referrer='incidents.get_incident_event_stats',
            limit=10000,
            **kwargs
        ),
        kwargs['start'],
        kwargs['end'],
        rollup,
    )
Ejemplo n.º 14
0
    def get(self, request, organization):
        try:
            snuba_args = self.get_snuba_query_args(request, organization)
        except OrganizationEventsError as exc:
            return Response({'detail': exc.message}, status=400)
        except NoProjects:
            return Response({'data': []})

        interval = parse_stats_period(request.GET.get('interval', '1h'))
        if interval is None:
            interval = timedelta(hours=1)

        rollup = int(interval.total_seconds())

        y_axis = request.GET.get('yAxis', None)
        if not y_axis or y_axis == 'event_count':
            aggregations = [('count()', '', 'count')]
        elif y_axis == 'user_count':
            aggregations = [
                ('uniq', 'tags[sentry:user]', 'count'),
            ]
            snuba_args['filter_keys']['tags_key'] = ['sentry:user']
        else:
            return Response(
                {'detail': 'Param yAxis value %s not recognized.' % y_axis}, status=400)

        result = raw_query(
            aggregations=aggregations,
            orderby='time',
            groupby=['time'],
            rollup=rollup,
            referrer='api.organization-events-stats',
            limit=10000,
            **snuba_args
        )

        serializer = SnubaTSResultSerializer(organization, None, request.user)
        return Response(
            serializer.serialize(
                SnubaTSResult(result, snuba_args['start'], snuba_args['end'], rollup),
            ),
            status=200,
        )
Ejemplo n.º 15
0
def get_incident_event_stats(incident, start=None, end=None, data_points=50):
    """
    Gets event stats for an incident. If start/end are provided, uses that time
    period, otherwise uses the incident start/current_end.
    """
    kwargs = build_incident_query_params(incident, start=start, end=end)
    rollup = max(int(incident.duration.total_seconds() / data_points), 1)
    return SnubaTSResult(
        raw_query(aggregations=[
            ('count()', '', 'count'),
        ],
                  orderby='time',
                  groupby=['time'],
                  rollup=rollup,
                  referrer='incidents.get_incident_event_stats',
                  limit=10000,
                  **kwargs),
        kwargs['start'],
        kwargs['end'],
        rollup,
    )
Ejemplo n.º 16
0
    def get(self, request, organization):
        try:
            snuba_args = self.get_snuba_query_args_legacy(request, organization)
        except OrganizationEventsError as exc:
            return Response({"detail": exc.message}, status=400)
        except NoProjects:
            return Response({"data": []})

        interval = parse_stats_period(request.GET.get("interval", "1h"))
        if interval is None:
            interval = timedelta(hours=1)

        rollup = int(interval.total_seconds())

        y_axis = request.GET.get("yAxis", None)
        if not y_axis or y_axis == "event_count":
            aggregations = [("count()", "", "count")]
        elif y_axis == "user_count":
            aggregations = [("uniq", "tags[sentry:user]", "count")]
            snuba_args["filter_keys"]["tags_key"] = ["sentry:user"]
        else:
            return Response({"detail": "Param yAxis value %s not recognized." % y_axis}, status=400)

        result = raw_query(
            aggregations=aggregations,
            orderby="time",
            groupby=["time"],
            rollup=rollup,
            referrer="api.organization-events-stats",
            limit=10000,
            **snuba_args
        )

        serializer = SnubaTSResultSerializer(organization, None, request.user)
        return Response(
            serializer.serialize(
                SnubaTSResult(result, snuba_args["start"], snuba_args["end"], rollup)
            ),
            status=200,
        )
Ejemplo n.º 17
0
def bulk_get_incident_stats(incidents, prewindow=False):
    """
    Returns bulk stats for a list of incidents. This includes unique user count,
    total event count and event stats.
    Note that even though this function accepts a prewindow parameter, it does not
    affect the snapshots if they were created using a prewindow. Only the live fetched stats.
    """
    closed = [i for i in incidents if i.status == IncidentStatus.CLOSED.value]
    incident_stats = {}
    snapshots = IncidentSnapshot.objects.filter(incident__in=closed)
    for snapshot in snapshots:
        event_stats = snapshot.event_stats_snapshot
        incident_stats[snapshot.incident_id] = {
            "event_stats":
            SnubaTSResult(event_stats.snuba_values, event_stats.start,
                          event_stats.end, event_stats.period),
            "total_events":
            snapshot.total_events,
            "unique_users":
            snapshot.unique_users,
        }

    to_fetch = [i for i in incidents if i.id not in incident_stats]
    if to_fetch:
        query_params_list = bulk_build_incident_query_params(
            to_fetch, prewindow=prewindow)
        all_event_stats = bulk_get_incident_event_stats(
            to_fetch, query_params_list)
        all_aggregates = bulk_get_incident_aggregates(query_params_list)
        for incident, event_stats, aggregates in zip(to_fetch, all_event_stats,
                                                     all_aggregates):
            incident_stats[incident.id] = {
                "event_stats": event_stats,
                "total_events": aggregates["count"],
                "unique_users": aggregates["unique_users"],
            }

    return [incident_stats[incident.id] for incident in incidents]
Ejemplo n.º 18
0
def bulk_get_incident_event_stats(incidents, query_params_list, data_points=50):
    snuba_params_list = [
        SnubaQueryParams(
            aggregations=[
                ('count()', '', 'count'),
            ],
            orderby='time',
            groupby=['time'],
            rollup=max(int(incident.duration.total_seconds() / data_points), 1),
            limit=10000,
            **query_param
        ) for incident, query_param in zip(incidents, query_params_list)
    ]
    results = bulk_raw_query(snuba_params_list, referrer='incidents.get_incident_event_stats')
    return [
        SnubaTSResult(
            result,
            snuba_params.start,
            snuba_params.end,
            snuba_params.rollup,
        )
        for snuba_params, result in zip(snuba_params_list, results)
    ]
Ejemplo n.º 19
0
def bulk_get_incident_event_stats(incidents, query_params_list):
    snuba_params_list = [
        SnubaQueryParams(aggregations=[(
            query_aggregation_to_snuba[aggregate_to_query_aggregation[
                incident.alert_rule.snuba_query.aggregate]][0],
            query_aggregation_to_snuba[aggregate_to_query_aggregation[
                incident.alert_rule.snuba_query.aggregate]][1],
            "count",
        )],
                         orderby="time",
                         groupby=["time"],
                         rollup=incident.alert_rule.snuba_query.time_window,
                         limit=10000,
                         **query_param)
        for incident, query_param in zip(incidents, query_params_list)
    ]
    results = bulk_raw_query(snuba_params_list,
                             referrer="incidents.get_incident_event_stats")
    return [
        SnubaTSResult(result, snuba_params.start, snuba_params.end,
                      snuba_params.rollup)
        for snuba_params, result in zip(snuba_params_list, results)
    ]
Ejemplo n.º 20
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                        usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                        this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                        to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    top_events = query(
        selected_columns,
        query=user_query,
        params=params,
        orderby=orderby,
        limit=limit,
        referrer=referrer,
    )

    snuba_filter, translated_columns = get_timeseries_snuba_filter(
        timeseries_columns + selected_columns, user_query, params, rollup)

    user_fields = FIELD_ALIASES["user"]["fields"]

    for field in selected_columns:
        # project is handled by filter_keys already
        if field in ["project", "project.id"]:
            continue
        if field == "issue":
            field = FIELD_ALIASES["issue"]["column_alias"]
        values = list({
            event.get(field)
            for event in top_events["data"] if field in event
        })
        if values:
            # timestamp needs special handling, creating a big OR instead
            if field == "timestamp":
                snuba_filter.conditions.append([["timestamp", "=", value]
                                                for value in values])
            # A user field can be any of its field aliases, do an OR across all the user fields
            elif field == "user":
                snuba_filter.conditions.append(
                    [[resolve_column(user_field), "IN", values]
                     for user_field in user_fields])
            elif None in values:
                non_none_values = [
                    value for value in values if value is not None
                ]
                condition = [[["isNull", [resolve_column(field)]], "=", 1]]
                if non_none_values:
                    condition.append(
                        [resolve_column(field), "IN", non_none_values])
                snuba_filter.conditions.append(condition)
            else:
                snuba_filter.conditions.append(
                    [resolve_column(field), "IN", values])

    result = raw_query(
        aggregations=snuba_filter.aggregations,
        conditions=snuba_filter.conditions,
        filter_keys=snuba_filter.filter_keys,
        start=snuba_filter.start,
        end=snuba_filter.end,
        rollup=rollup,
        orderby="time",
        groupby=["time"] + snuba_filter.groupby,
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )

    result = transform_results(result, translated_columns, snuba_filter,
                               selected_columns)

    translated_columns["project_id"] = "project"
    translated_groupby = [
        translated_columns.get(groupby, groupby)
        for groupby in snuba_filter.groupby
    ]

    if "user" in selected_columns:
        # Determine user related fields to prune based on what wasn't selected, since transform_results does the same
        for field in user_fields:
            if field not in selected_columns:
                translated_groupby.remove(field)
        translated_groupby.append("user")
    issues = {}
    if "issue" in selected_columns:
        issues = Group.issues_mapping(
            set([event["issue.id"] for event in top_events["data"]]),
            params["project_id"],
            organization,
        )
    # so the result key is consistent
    translated_groupby.sort()

    results = {}
    for row in result["data"]:
        result_key = create_result_key(row, translated_groupby, issues)
        results.setdefault(result_key, {"data": []})["data"].append(row)
    # Using the top events add the order to the results
    for index, item in enumerate(top_events["data"]):
        result_key = create_result_key(item, translated_groupby, issues)
        results[result_key]["order"] = index
    for key, item in six.iteritems(results):
        results[key] = SnubaTSResult(
            {
                "data":
                zerofill(item["data"], snuba_filter.start, snuba_filter.end,
                         rollup, "time"),
                "order":
                item["order"],
            },
            snuba_filter.start,
            snuba_filter.end,
            rollup,
        )

    return results
Ejemplo n.º 21
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
    top_events=None,
    allow_empty=True,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                    usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                    this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                    to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    top_events (dict|None) A dictionary with a 'data' key containing a list of dictionaries that
                    represent the top events matching the query. Useful when you have found
                    the top events earlier and want to save a query.
    """
    if top_events is None:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="top_events.fetch_events"):
            top_events = query(
                selected_columns,
                query=user_query,
                params=params,
                orderby=orderby,
                limit=limit,
                referrer=referrer,
                auto_aggregations=True,
                use_aggregate_conditions=True,
            )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="top_events.filter_transform") as span:
        span.set_data("query", user_query)
        snuba_filter, translated_columns = get_timeseries_snuba_filter(
            list(sorted(set(timeseries_columns + selected_columns))),
            user_query,
            params,
            rollup,
            default_count=False,
        )

        for field in selected_columns:
            # If we have a project field, we need to limit results by project so we dont hit the result limit
            if field in ["project", "project.id"] and top_events["data"]:
                snuba_filter.project_ids = [
                    event["project.id"] for event in top_events["data"]
                ]
                continue
            if field in FIELD_ALIASES:
                field = FIELD_ALIASES[field].alias
            # Note that because orderby shouldn't be an array field its not included in the values
            values = list({
                event.get(field)
                for event in top_events["data"]
                if field in event and not isinstance(event.get(field), list)
            })
            if values:
                # timestamp fields needs special handling, creating a big OR instead
                if field == "timestamp" or field.startswith("timestamp.to_"):
                    snuba_filter.conditions.append([[field, "=", value]
                                                    for value in sorted(values)
                                                    ])
                elif None in values:
                    non_none_values = [
                        value for value in values if value is not None
                    ]
                    condition = [[["isNull", [resolve_discover_column(field)]],
                                  "=", 1]]
                    if non_none_values:
                        condition.append([
                            resolve_discover_column(field), "IN",
                            non_none_values
                        ])
                    snuba_filter.conditions.append(condition)
                elif field in FIELD_ALIASES:
                    snuba_filter.conditions.append([field, "IN", values])
                else:
                    snuba_filter.conditions.append(
                        [resolve_discover_column(field), "IN", values])

    with sentry_sdk.start_span(op="discover.discover",
                               description="top_events.snuba_query"):
        result = raw_query(
            aggregations=snuba_filter.aggregations,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            selected_columns=snuba_filter.selected_columns,
            start=snuba_filter.start,
            end=snuba_filter.end,
            rollup=rollup,
            orderby=["time"] + snuba_filter.groupby,
            groupby=["time"] + snuba_filter.groupby,
            dataset=Dataset.Discover,
            limit=10000,
            referrer=referrer,
        )

    if not allow_empty and not len(result.get("data", [])):
        return SnubaTSResult(
            {
                "data":
                zerofill([], snuba_filter.start, snuba_filter.end, rollup,
                         "time")
            },
            snuba_filter.start,
            snuba_filter.end,
            rollup,
        )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="top_events.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        result = transform_data(result, translated_columns, snuba_filter,
                                selected_columns)

        if "project" in selected_columns:
            translated_columns["project_id"] = "project"
        translated_groupby = [
            translated_columns.get(groupby, groupby)
            for groupby in snuba_filter.groupby
        ]

        issues = {}
        if "issue" in selected_columns:
            issues = Group.issues_mapping(
                {event["issue.id"]
                 for event in top_events["data"]},
                params["project_id"],
                organization,
            )
        # so the result key is consistent
        translated_groupby.sort()

        results = {}
        # Using the top events add the order to the results
        for index, item in enumerate(top_events["data"]):
            result_key = create_result_key(item, translated_groupby, issues)
            results[result_key] = {"order": index, "data": []}
        for row in result["data"]:
            result_key = create_result_key(row, translated_groupby, issues)
            if result_key in results:
                results[result_key]["data"].append(row)
            else:
                logger.warning(
                    "discover.top-events.timeseries.key-mismatch",
                    extra={
                        "result_key": result_key,
                        "top_event_keys": list(results.keys())
                    },
                )
        for key, item in results.items():
            results[key] = SnubaTSResult(
                {
                    "data":
                    zerofill(item["data"], snuba_filter.start,
                             snuba_filter.end, rollup, "time"),
                    "order":
                    item["order"],
                },
                snuba_filter.start,
                snuba_filter.end,
                rollup,
            )

    return results
Ejemplo n.º 22
0
def timeseries_query(selected_columns,
                     query,
                     params,
                     rollup,
                     reference_event=None,
                     referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    snuba_filter = get_filter(query, params)
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
    }
    if not snuba_args["start"] and not snuba_args["end"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_args.update(
        resolve_field_list(selected_columns, snuba_args, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_args["conditions"].extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_args, _ = resolve_discover_aliases(snuba_args)
    if not snuba_args["aggregations"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    result = raw_query(
        aggregations=snuba_args.get("aggregations"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        rollup=rollup,
        orderby="time",
        groupby=["time"],
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )
    result = zerofill(result["data"], snuba_args["start"], snuba_args["end"],
                      rollup, "time")

    return SnubaTSResult(result, snuba_filter.start, snuba_filter.end, rollup)
Ejemplo n.º 23
0
def timeseries_query(selected_columns, query, params, rollup, reference_event=None, referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    # TODO(evanh): These can be removed once we migrate the frontend / saved queries
    # to use the new function values
    selected_columns, _ = transform_deprecated_functions_in_columns(selected_columns)
    query = transform_deprecated_functions_in_query(query)

    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery("Cannot get timeseries result without a start and end.")

    snuba_filter.update_with(resolve_field_list(selected_columns, snuba_filter, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_filter.conditions.extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, _ = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery("Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_filter.aggregations) == 1:
        snuba_filter.aggregations[0][2] = "count"

    result = raw_query(
        aggregations=snuba_filter.aggregations,
        conditions=snuba_filter.conditions,
        filter_keys=snuba_filter.filter_keys,
        start=snuba_filter.start,
        end=snuba_filter.end,
        rollup=rollup,
        orderby="time",
        groupby=["time"],
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )
    result = zerofill(result["data"], snuba_filter.start, snuba_filter.end, rollup, "time")

    return SnubaTSResult({"data": result}, snuba_filter.start, snuba_filter.end, rollup)
Ejemplo n.º 24
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                        usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                        this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                        to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    top_events = query(
        selected_columns,
        query=user_query,
        params=params,
        orderby=orderby,
        limit=limit,
        referrer=referrer,
    )

    snuba_filter, translated_columns = get_timeseries_snuba_filter(
        timeseries_columns + selected_columns, user_query, params, rollup)

    for field in selected_columns:
        # project is handled by filter_keys already
        if field in ["project", "project.id"]:
            continue
        values = list({
            event.get(field)
            for event in top_events["data"] if field in event
        })
        if values and all(value is not None for value in values):
            # timestamp needs special handling, creating a big OR instead
            if field == "timestamp":
                snuba_filter.conditions.append([["timestamp", "=", value]
                                                for value in values])
            else:
                snuba_filter.conditions.append(
                    [resolve_column(field), "IN", values])

    result = raw_query(
        aggregations=snuba_filter.aggregations,
        conditions=snuba_filter.conditions,
        filter_keys=snuba_filter.filter_keys,
        start=snuba_filter.start,
        end=snuba_filter.end,
        rollup=rollup,
        orderby="time",
        groupby=["time"] + snuba_filter.groupby,
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )

    result = transform_results(result, translated_columns, snuba_filter)
    issues = {}
    if "issue" in selected_columns:
        issues = Group.issues_mapping(
            set([event["issue.id"] for event in top_events["data"]]),
            params["project_id"],
            organization,
        )

    translated_columns["project_id"] = "project"
    translated_groupby = [
        translated_columns.get(field, field) for field in snuba_filter.groupby
    ]
    # so the result key is consistent
    translated_groupby.sort()

    results = {}
    for row in result["data"]:
        values = []
        for field in translated_groupby:
            if field == "issue.id":
                values.append(issues.get(row["issue.id"], "unknown"))
            else:
                values.append(six.text_type(row.get(field)))
        result_key = ",".join(values)
        results.setdefault(result_key, []).append(row)
    for key, item in six.iteritems(results):
        results[key] = SnubaTSResult(
            {
                "data":
                zerofill(item, snuba_filter.start, snuba_filter.end, rollup,
                         "time")
            },
            snuba_filter.start,
            snuba_filter.end,
            rollup,
        )

    return results
Ejemplo n.º 25
0
def timeseries_query(
    selected_columns: Sequence[str],
    query: str,
    params: Dict[str, str],
    rollup: int,
    referrer: str,
    zerofill_results: bool = True,
    comparison_delta: Optional[timedelta] = None,
    functions_acl: Optional[List[str]] = None,
    use_snql: Optional[bool] = False,
) -> SnubaTSResult:
    """
    High-level API for doing arbitrary user timeseries queries against events.
    this API should match that of sentry.snuba.discover.timeseries_query
    """
    metrics_compatible = False
    equations, columns = categorize_columns(selected_columns)
    if comparison_delta is None and not equations:
        metrics_compatible = True

    if metrics_compatible:
        try:
            metrics_query = TimeseriesMetricQueryBuilder(
                params,
                rollup,
                query=query,
                selected_columns=columns,
                functions_acl=functions_acl,
            )
            result = metrics_query.run_query(referrer + ".metrics-enhanced")
            result = discover.transform_results(result, metrics_query.function_alias_map, {}, None)
            result["data"] = (
                discover.zerofill(
                    result["data"],
                    params["start"],
                    params["end"],
                    rollup,
                    "time",
                )
                if zerofill_results
                else result["data"]
            )
            return SnubaTSResult(
                {"data": result["data"], "isMetricsData": True},
                params["start"],
                params["end"],
                rollup,
            )
        # raise Invalid Queries since the same thing will happen with discover
        except InvalidSearchQuery as error:
            raise error
        # any remaining errors mean we should try again with discover
        except IncompatibleMetricsQuery:
            metrics_compatible = False

    # This isn't a query we can enhance with metrics
    if not metrics_compatible:
        return discover.timeseries_query(
            selected_columns,
            query,
            params,
            rollup,
            referrer,
            zerofill_results,
            comparison_delta,
            functions_acl,
            use_snql,
        )
    return SnubaTSResult()