def test_filter_keys_set(self): snuba.raw_query( start=datetime.now(), end=datetime.now(), filter_keys={ 'project_id': set([1]), 'logger': set(['asdf']), }, aggregations=[ ['count()', '', 'count'], ], )
def __search_events_snuba(self, request, project): from functools32 import partial from sentry.api.paginator import GenericOffsetPaginator from sentry.api.serializers.models.event import SnubaEvent from sentry.utils.snuba import raw_query query = request.GET.get('query') conditions = [] if query: conditions.append( [['positionCaseInsensitive', ['message', "'%s'" % (query,)]], '!=', 0]) now = timezone.now() data_fn = partial( # extract 'data' from raw_query result lambda *args, **kwargs: raw_query(*args, **kwargs)['data'], start=now - timedelta(days=90), end=now, conditions=conditions, filter_keys={'project_id': [project.id]}, selected_columns=SnubaEvent.selected_columns, orderby='-timestamp', referrer='api.project-events', ) return self.paginate( request=request, on_results=lambda results: serialize( [SnubaEvent(row) for row in results], request.user), paginator=GenericOffsetPaginator(data_fn=data_fn) )
def get(self, request, organization): try: snuba_args = self.get_snuba_query_args(request, organization) except OrganizationEventsError as exc: return Response({'detail': exc.message}, status=400) except NoProjects: return Response({'data': []}) interval = parse_stats_period(request.GET.get('interval', '1h')) if interval is None: interval = timedelta(hours=1) rollup = int(interval.total_seconds()) result = raw_query( aggregations=[ ('count()', '', 'count'), ], orderby='time', groupby=['time'], rollup=rollup, referrer='api.organization-events-stats', limit=10000, **snuba_args ) serializer = SnubaTSResultSerializer(organization, None, request.user) return Response( serializer.serialize( SnubaTSResult(result, snuba_args['start'], snuba_args['end'], rollup), ), status=200, )
def prev_event_id(self, environments=None): from sentry.utils import snuba conditions = [ ['timestamp', '<=', self.timestamp], [['timestamp', '<', self.timestamp], ['event_id', '<', self.event_id]] ] if environments: conditions.append(['environment', 'IN', environments]) result = snuba.raw_query( start=datetime.utcfromtimestamp(0), # will be clamped to project retention end=self.datetime, # lte current event selected_columns=['event_id'], conditions=conditions, filter_keys={ 'project_id': [self.project_id], 'issue': [self.group_id], }, orderby=['-timestamp', '-event_id'], limit=1, referrer='SnubaEvent.prev_event_id', ) if 'error' in result or len(result['data']) == 0: return None return six.text_type(result['data'][0]['event_id'])
def get(self, request, organization): try: snuba_args = self.get_snuba_query_args(request, organization) except OrganizationEventsError as exc: return Response({'detail': exc.message}, status=400) except NoProjects: # return empty result if org doesn't have projects # or user doesn't have access to projects in org data_fn = lambda *args, **kwargs: [] else: data_fn = partial( # extract 'data' from raw_query result lambda *args, **kwargs: raw_query(*args, **kwargs)['data'], selected_columns=SnubaEvent.selected_columns, orderby='-timestamp', referrer='api.organization-events', **snuba_args ) return self.paginate( request=request, on_results=lambda results: serialize( [SnubaEvent(row) for row in results], request.user), paginator=GenericOffsetPaginator(data_fn=data_fn) )
def test_use_group_id(self): base_time = datetime.utcnow() group = self.create_group() self._insert_event_for_time(base_time, group_id=group.id) with self.options({'snuba.use_group_id_column': True}): # verify filter_keys and aggregation assert snuba.query( start=base_time - timedelta(days=1), end=base_time + timedelta(days=1), groupby=['issue'], filter_keys={ 'project_id': [self.project.id], 'issue': [group.id] }, ) == {group.id: 1} # verify raw_query selecting issue row assert snuba.raw_query( start=base_time - timedelta(days=1), end=base_time + timedelta(days=1), selected_columns=['issue', 'timestamp'], filter_keys={ 'project_id': [self.project.id], 'issue': [group.id] }, )['data'] == [{ 'issue': group.id, 'timestamp': base_time.strftime('%Y-%m-%dT%H:%M:%S+00:00'), }]
def get_oldest_or_latest_event_for_environments( ordering, environments=(), issue_id=None, project_id=None): from sentry.utils import snuba from sentry.models import SnubaEvent conditions = [] if len(environments) > 0: conditions.append(['environment', 'IN', environments]) result = snuba.raw_query( start=datetime.utcfromtimestamp(0), end=datetime.utcnow(), selected_columns=SnubaEvent.selected_columns, conditions=conditions, filter_keys={ 'issue': [issue_id], 'project_id': [project_id], }, orderby=ordering.value, limit=1, referrer="Group.get_latest", ) if 'error' not in result and len(result['data']) == 1: return SnubaEvent(result['data'][0]) return None
def get(self, request, organization): query = request.GET.get('query') conditions = [] if query: conditions.append( [['positionCaseInsensitive', ['message', "'%s'" % (query,)]], '!=', 0]) try: start, end = get_date_range_from_params(request.GET) except InvalidParams as exc: return Response({'detail': exc.message}, status=400) try: project_ids = self.get_project_ids(request, organization) except ValueError: return Response({'detail': 'Invalid project ids'}, status=400) data_fn = partial( # extract 'data' from raw_query result lambda *args, **kwargs: raw_query(*args, **kwargs)['data'], start=start, end=end, conditions=conditions, filter_keys={'project_id': project_ids}, selected_columns=SnubaEvent.selected_columns, orderby='-timestamp', ) return self.paginate( request=request, on_results=lambda results: serialize( [SnubaEvent(row) for row in results], request.user), paginator=GenericOffsetPaginator(data_fn=data_fn) )
def get_group_event_filter(self, project_id, group_id, environment_ids, tags, start, end): default_start, default_end = self.get_time_range() start = max(start, default_start) if start else default_start end = min(end, default_end) if end else default_end filters = { 'project_id': [project_id], 'issue': [group_id], } if environment_ids: filters['environment'] = environment_ids conditions = [] for tag_name, tag_val in tags.items(): operator = 'IN' if isinstance(tag_val, list) else '=' conditions.append([u'tags[{}]'.format(tag_name), operator, tag_val]) result = snuba.raw_query(start, end, selected_columns=['event_id'], conditions=conditions, orderby='-timestamp', filter_keys=filters, limit=1000, referrer='tagstore.get_group_event_filter') event_id_set = set(row['event_id'] for row in result['data']) if not event_id_set: return None return {'event_id__in': event_id_set}
def test_shrink_timeframe(self): now = datetime.now() year_ago = now - timedelta(days=365) year_ahead = now + timedelta(days=365) issues = None assert snuba.shrink_time_window(issues, year_ago, year_ahead) == (year_ago, year_ahead) issues = [] assert snuba.shrink_time_window(issues, year_ago, year_ahead) == (year_ago, year_ahead) group1 = self.create_group() group1.first_seen = now - timedelta(hours=1) group1.last_seen = now group1.save() GroupHash.objects.create(project_id=group1.project_id, group=group1, hash='a' * 32) group2 = self.create_group() GroupHash.objects.create(project_id=group2.project_id, group=group2, hash='b' * 32) # issues is a list like [(gid, pid, [(hash, tombstone_date), ...]), ...] issues = [(group1.id, group1.project_id, [('a' * 32, None)])] assert snuba.shrink_time_window(issues, year_ago, year_ahead) == \ (now - timedelta(hours=1, minutes=5), now + timedelta(minutes=5)) issues = [ (group1.id, group1.project_id, [('a' * 32, None)]), (group2.id, group2.project_id, [('b' * 32, None)]), ] assert snuba.shrink_time_window(issues, year_ago, year_ahead) == (year_ago, year_ahead) with pytest.raises(snuba.QueryOutsideGroupActivityError): # query a group for a time range before it had any activity snuba.raw_query( start=group1.first_seen - timedelta(days=1, hours=1), end=group1.first_seen - timedelta(days=1), filter_keys={ 'project_id': [group1.project_id], 'issue': [group1.id], }, aggregations=[ ['count()', '', 'count'], ], )
def do_query(self, projects, **kwargs): requested_query = deepcopy(kwargs) selected_columns = kwargs['selected_columns'] groupby_columns = kwargs['groupby'] if 'project_name' in requested_query['selected_columns']: selected_columns.remove('project_name') if 'project_id' not in selected_columns: selected_columns.append('project_id') if 'project_name' in requested_query['groupby']: groupby_columns.remove('project_name') if 'project_id' not in groupby_columns: groupby_columns.append('project_id') for aggregation in kwargs['aggregations']: if aggregation[1] == 'project_name': aggregation[1] = 'project_id' snuba_results = snuba.raw_query( referrer='discover', **kwargs ) if 'project_name' in requested_query['selected_columns']: project_name_index = requested_query['selected_columns'].index('project_name') snuba_results['meta'].insert(project_name_index, {'name': 'project_name'}) if 'project_id' not in requested_query['selected_columns']: snuba_results['meta'] = [ field for field in snuba_results['meta'] if field['name'] != 'project_id' ] for result in snuba_results['data']: result['project_name'] = projects[result['project_id']] if 'project_id' not in requested_query['selected_columns']: del result['project_id'] if 'project_name' in requested_query['groupby']: project_name_index = requested_query['groupby'].index('project_name') snuba_results['meta'].insert(project_name_index, {'name': 'project_name'}) if 'project_id' not in requested_query['groupby']: snuba_results['meta'] = [ field for field in snuba_results['meta'] if field['name'] != 'project_id' ] for result in snuba_results['data']: result['project_name'] = projects[result['project_id']] if 'project_id' not in requested_query['groupby']: del result['project_id'] # Only return the meta propety "name" snuba_results['meta'] = [{'name': field['name']} for field in snuba_results['meta']] return snuba_results
def do_query(self, start, end, groupby, **kwargs): snuba_results = snuba.raw_query( start=start, end=end, groupby=groupby, referrer='discover', **kwargs ) return snuba_results
def get_incident_aggregates(incident): kwargs = build_incident_query_params(incident) return raw_query( aggregations=[ ('count()', '', 'count'), ('uniq', 'tags[sentry:user]', 'unique_users'), ], referrer='incidents.get_incident_aggregates', limit=10000, **kwargs )['data'][0]
def test_shrink_timeframe(self): now = datetime.now() year_ago = now - timedelta(days=365) issues = None assert snuba.shrink_time_window(issues, year_ago) == year_ago issues = [] assert snuba.shrink_time_window(issues, year_ago) == year_ago group1 = self.create_group() group1.first_seen = now - timedelta(hours=1) group1.last_seen = now group1.save() GroupHash.objects.create(project_id=group1.project_id, group=group1, hash='a' * 32) group2 = self.create_group() GroupHash.objects.create(project_id=group2.project_id, group=group2, hash='b' * 32) issues = [group1.id] assert snuba.shrink_time_window(issues, year_ago) == \ now - timedelta(hours=1, minutes=5) issues = [group1.id, group2.id] assert snuba.shrink_time_window(issues, year_ago) == year_ago with pytest.raises(snuba.QueryOutsideGroupActivityError): # query a group for a time range before it had any activity snuba.raw_query( start=group1.first_seen - timedelta(days=1, hours=1), end=group1.first_seen - timedelta(days=1), filter_keys={ 'project_id': [group1.project_id], 'issue': [group1.id], }, aggregations=[ ['count()', '', 'count'], ], )
def get_event(cls, project_id, event_id, snuba_cols=selected_columns): from sentry.utils import snuba result = snuba.raw_query( start=datetime.utcfromtimestamp(0), # will be clamped to project retention end=datetime.utcnow(), # will be clamped to project retention selected_columns=snuba_cols, filter_keys={ 'event_id': [event_id], 'project_id': [project_id], }, referrer='SnubaEvent.get_event', ) if 'error' not in result and len(result['data']) == 1: return SnubaEvent(result['data'][0]) return None
def _get_events_snuba(self, request, group, environments, query, tags, start, end): default_end = timezone.now() default_start = default_end - timedelta(days=90) params = { 'issue.id': [group.id], 'project_id': [group.project_id], 'start': start if start else default_start, 'end': end if end else default_end } direct_hit_resp = get_direct_hit_response(request, query, params, 'api.group-events') if direct_hit_resp: return direct_hit_resp if environments: params['environment'] = [env.name for env in environments] full = request.GET.get('full', False) snuba_args = get_snuba_query_args(request.GET.get('query', None), params) # TODO(lb): remove once boolean search is fully functional if snuba_args: has_boolean_op_flag = features.has( 'organizations:boolean-search', group.project.organization, actor=request.user ) if snuba_args.pop('has_boolean_terms', False) and not has_boolean_op_flag: raise GroupEventsError( 'Boolean search operator OR and AND not allowed in this search.') snuba_cols = SnubaEvent.minimal_columns if full else SnubaEvent.selected_columns data_fn = partial( # extract 'data' from raw_query result lambda *args, **kwargs: raw_query(*args, **kwargs)['data'], selected_columns=snuba_cols, orderby='-timestamp', referrer='api.group-events', **snuba_args ) serializer = EventSerializer() if full else SimpleEventSerializer() return self.paginate( request=request, on_results=lambda results: serialize( [SnubaEvent(row) for row in results], request.user, serializer), paginator=GenericOffsetPaginator(data_fn=data_fn) )
def get(self, request, organization): # Check for a direct hit on event ID query = request.GET.get('query', '').strip() try: direct_hit_resp = get_direct_hit_response( request, query, self.get_filter_params(request, organization), 'api.organization-events' ) except (OrganizationEventsError, NoProjects): pass else: if direct_hit_resp: return direct_hit_resp full = request.GET.get('full', False) try: snuba_args = self.get_snuba_query_args(request, organization) except OrganizationEventsError as exc: return Response({'detail': exc.message}, status=400) except NoProjects: # return empty result if org doesn't have projects # or user doesn't have access to projects in org data_fn = lambda *args, **kwargs: [] else: snuba_cols = SnubaEvent.minimal_columns if full else SnubaEvent.selected_columns data_fn = partial( # extract 'data' from raw_query result lambda *args, **kwargs: raw_query(*args, **kwargs)['data'], selected_columns=snuba_cols, orderby='-timestamp', referrer='api.organization-events', **snuba_args ) serializer = EventSerializer() if full else SimpleEventSerializer() return self.paginate( request=request, on_results=lambda results: serialize( [SnubaEvent(row) for row in results], request.user, serializer), paginator=GenericOffsetPaginator(data_fn=data_fn) )
def get_incident_event_stats(incident, data_points=20): kwargs = build_incident_query_params(incident) rollup = max(int(incident.duration.total_seconds() / data_points), 1) return SnubaTSResult( raw_query( aggregations=[ ('count()', '', 'count'), ], orderby='time', groupby=['time'], rollup=rollup, referrer='incidents.get_incident_event_stats', limit=10000, **kwargs ), kwargs['start'], kwargs['end'], rollup, )
def get_group_event_filter(self, project_id, group_id, environment_id, tags): start, end = self.get_time_range() filters = { 'project_id': [project_id], 'environment': [environment_id], 'issue': [group_id], } conditions = [[u'tags[{}]'.format(k), '=', v] for (k, v) in tags.items()] result = snuba.raw_query(start, end, selected_columns=['event_id'], conditions=conditions, orderby='-timestamp', filter_keys=filters, limit=1000, referrer='tagstore.get_group_event_filter') event_id_set = set(row['event_id'] for row in result['data']) if not event_id_set: return None return {'event_id__in': event_id_set}
def get(self, request, organization): try: snuba_args = self.get_snuba_query_args(request, organization) except OrganizationEventsError as exc: return Response({'detail': exc.message}, status=400) except NoProjects: return Response({'count': 0}) data = raw_query( aggregations=[['count()', '', 'count']], referrer='api.organization-event-meta', turbo=True, **snuba_args )['data'][0] return Response({ # this needs to be multiplied to account for the `TURBO_SAMPLE_RATE` # in snuba 'count': data['count'] * 10, })
def get_direct_hit_response(request, query, snuba_params, referrer): """ Checks whether a query is a direct hit for an event, and if so returns a response. Otherwise returns None """ if is_event_id(query): snuba_args = get_snuba_query_args( query=u'id:{}'.format(query), params=snuba_params) results = raw_query( selected_columns=SnubaEvent.selected_columns, referrer=referrer, **snuba_args )['data'] if len(results) == 1: response = Response( serialize([SnubaEvent(row) for row in results], request.user) ) response['X-Sentry-Direct-Hit'] = '1' return response
def calculate_incident_start(query, projects, groups): """ Attempts to automatically calculate the date that an incident began at based on the events related to the incident. """ params = {} if groups: params["issue.id"] = [g.id for g in groups] end = max(g.last_seen for g in groups) + timedelta(seconds=1) else: end = timezone.now() params["start"] = end - INCIDENT_START_PERIOD params["end"] = end if projects: params["project_id"] = [p.id for p in projects] query_args = get_snuba_query_args(query, params) rollup = int(INCIDENT_START_ROLLUP.total_seconds()) result = raw_query(aggregations=[("count()", "", "count"), ("min", "timestamp", "first_seen")], orderby="time", groupby=["time"], rollup=rollup, referrer="incidents.calculate_incident_start", limit=10000, **query_args)["data"] # TODO: Start could be the period before the first period we find result = zerofill(result, params["start"], params["end"], rollup, "time") # We want to linearly scale scores from 100% value at the most recent to # 50% at the oldest. This gives a bias towards newer results. negative_weight = (1.0 / len(result)) / 2 multiplier = 1.0 cur_spike_max_count = -1 cur_spike_start = None cur_spike_end = None max_height = 0 incident_start = None cur_height = 0 prev_count = 0 def get_row_first_seen(row, default=None): first_seen = default if "first_seen" in row: first_seen = parse_date(row["first_seen"]).replace(tzinfo=pytz.utc) return first_seen def calculate_start(spike_start, spike_end): """ We arbitrarily choose a date about 1/3 into the incident period. We could potentially improve this if we want by analyzing the period in more detail and choosing a date that most closely fits with being 1/3 up the spike. """ spike_length = spike_end - spike_start return spike_start + (spike_length / 3) for row in reversed(result): cur_count = row.get("count", 0) if cur_count < prev_count or cur_count > 0 and cur_count == prev_count: cur_height = cur_spike_max_count - cur_count elif cur_count > 0 or prev_count > 0 or cur_height > 0: # Now we've got the height of the current spike, compare it to the # current max. We decrease the value by `multiplier` so that we # favour newer results cur_height *= multiplier if cur_height > max_height: # If we detect that we have a new highest peak, then set a new # incident start date incident_start = calculate_start(cur_spike_start, cur_spike_end) max_height = cur_height cur_height = 0 cur_spike_max_count = cur_count cur_spike_end = get_row_first_seen(row) # We attempt to get the first_seen value from the row here. If the row # doesn't have it (because it's a zerofilled row), then just use the # previous value. This allows us to have the start of a spike always be # a bucket that contains at least one element. cur_spike_start = get_row_first_seen(row, cur_spike_start) prev_count = cur_count multiplier -= negative_weight if (cur_height > max_height or not incident_start) and cur_spike_start: incident_start = calculate_start(cur_spike_start, cur_spike_end) if not incident_start: incident_start = timezone.now() return incident_start
def get_release_health_data_overview( project_releases, environments=None, summary_stats_period=None, health_stats_period=None, stat=None, ): """Checks quickly for which of the given project releases we have health data available. The argument is a tuple of `(project_id, release_name)` tuples. The return value is a set of all the project releases that have health data. """ if stat is None: stat = "sessions" assert stat in ("sessions", "users") _, summary_start, _ = get_rollup_starts_and_buckets(summary_stats_period or "24h") conditions, filter_keys = _get_conditions_and_filter_keys( project_releases, environments) stats_rollup, stats_start, stats_buckets = get_rollup_starts_and_buckets( health_stats_period) missing_releases = set(project_releases) rv = {} for x in raw_query( dataset=Dataset.Sessions, selected_columns=[ "release", "project_id", "duration_quantiles", "users", "sessions", "sessions_errored", "sessions_crashed", "users_crashed", ], groupby=["release", "project_id"], start=summary_start, conditions=conditions, filter_keys=filter_keys, )["data"]: rp = { "duration_p50": _convert_duration(x["duration_quantiles"][0]), "duration_p90": _convert_duration(x["duration_quantiles"][1]), "crash_free_users": (100 - x["users_crashed"] / float(x["users"]) * 100 if x["users"] else None), "crash_free_sessions": (100 - x["sessions_crashed"] / float(x["sessions"]) * 100 if x["sessions"] else None), "total_users": x["users"], "total_sessions": x["sessions"], "sessions_crashed": x["sessions_crashed"], "sessions_errored": x["sessions_errored"], "has_health_data": True, } if health_stats_period: rp["stats"] = { health_stats_period: _make_stats(stats_start, stats_rollup, stats_buckets) } rv[x["project_id"], x["release"]] = rp missing_releases.discard((x["project_id"], x["release"])) # Add releases without data points if missing_releases: # If we're already looking at a 90 day horizont we don't need to # fire another query, we can already assume there is no data. if summary_stats_period != "90d": has_health_data = check_has_health_data(missing_releases) else: has_health_data = () for key in missing_releases: rv[key] = { "duration_p50": None, "duration_p90": None, "crash_free_users": None, "crash_free_sessions": None, "total_users": 0, "total_sessions": 0, "sessions_crashed": 0, "sessions_errored": 0, "has_health_data": key in has_health_data, } if health_stats_period: rv[key]["stats"] = { health_stats_period: _make_stats(stats_start, stats_rollup, stats_buckets) } # Fill in release adoption release_adoption = get_release_adoption(project_releases, environments) for key in rv: adoption_info = release_adoption.get(key) or {} rv[key]["adoption"] = adoption_info.get("adoption") rv[key]["total_users_24h"] = adoption_info.get("users_24h") rv[key]["total_sessions_24h"] = adoption_info.get("sessions_24h") if health_stats_period: for x in raw_query( dataset=Dataset.Sessions, selected_columns=[ "release", "project_id", "bucketed_started", stat ], groupby=["release", "project_id", "bucketed_started"], rollup=stats_rollup, start=stats_start, conditions=conditions, filter_keys=filter_keys, )["data"]: time_bucket = int((parse_snuba_datetime(x["bucketed_started"]) - stats_start).total_seconds() / stats_rollup) rv[x["project_id"], x["release"]]["stats"][health_stats_period][ time_bucket][1] = x[stat] return rv
def query(**kwargs): kwargs['referrer'] = 'health' kwargs['totals'] = True return snuba.raw_query(**kwargs)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, conditions=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # TODO(evanh): These can be removed once we migrate the frontend / saved queries # to use the new function values selected_columns, function_translations = transform_deprecated_functions_in_columns( selected_columns) query = transform_deprecated_functions_in_query(query) snuba_filter = get_filter(query, params) # TODO(mark) Refactor the need for this translation shim once all of # discover is using this module. Remember to update all the functions # in this module. snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, "orderby": orderby, "having": [], } if use_aggregate_conditions: snuba_args["having"] = snuba_filter.having # We need to run a separate query to be able to properly bucket the values for the histogram # Do that here, and format the bucket number in to the columns before passing it through # to event search. idx = 0 for col in selected_columns: if col.startswith("histogram("): histogram_column = find_histogram_buckets(col, params, snuba_filter.conditions) selected_columns[idx] = histogram_column function_translations[get_function_alias( histogram_column)] = get_function_alias(col) break idx += 1 # Check to see if we are ordering by any functions and convert the orderby to be the correct alias. if orderby: orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] new_orderby = [] for ordering in orderby: is_reversed = ordering.startswith("-") ordering = ordering.lstrip("-") for snuba_name, sentry_name in six.iteritems( function_translations): if sentry_name == ordering: ordering = snuba_name break ordering = "{}{}".format("-" if is_reversed else "", ordering) new_orderby.append(ordering) snuba_args["orderby"] = new_orderby snuba_args.update( resolve_field_list(selected_columns, snuba_args, params=params, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_args["conditions"].extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_args, translated_columns = resolve_discover_aliases( snuba_args, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_args.get("having"): found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_args.get("aggregations")) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) if conditions is not None: snuba_args["conditions"].extend(conditions) result = raw_query( start=snuba_args.get("start"), end=snuba_args.get("end"), groupby=snuba_args.get("groupby"), conditions=snuba_args.get("conditions"), aggregations=snuba_args.get("aggregations"), selected_columns=snuba_args.get("selected_columns"), filter_keys=snuba_args.get("filter_keys"), having=snuba_args.get("having"), orderby=snuba_args.get("orderby"), dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) return transform_results(result, translated_columns, snuba_args)
def histogram_query( fields, user_query, params, num_buckets, precision=0, min_value=None, max_value=None, data_filter=None, referrer=None, group_by=None, order_by=None, limit_by=None, extra_conditions=None, normalize_results=True, ): """ API for generating histograms for numeric columns. A multihistogram is possible only if the columns are all array columns. Array columns are columns whose values are nested arrays. Measurements and span op breakdowns are examples of array columns. The resulting histograms will have their bins aligned. :param [str] fields: The list of fields for which you want to generate histograms for. :param str user_query: Filter query string to create conditions from. :param {str: str} params: Filtering parameters with start, end, project_id, environment :param int num_buckets: The number of buckets the histogram should contain. :param int precision: The number of decimal places to preserve, default 0. :param float min_value: The minimum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param float max_value: The maximum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param str data_filter: Indicate the filter strategy to be applied to the data. :param [str] group_by: Experimental. Allows additional grouping to serve multifacet histograms. :param [str] order_by: Experimental. Allows additional ordering within each alias to serve multifacet histograms. :param [str] limit_by: Experimental. Allows limiting within a group when serving multifacet histograms. :param [str] extra_conditions: Adds any additional conditions to the histogram query that aren't received from params. :param bool normalize_results: Indicate whether to normalize the results by column into bins. """ multiplier = int(10**precision) if max_value is not None: # We want the specified max_value to be exclusive, and the queried max_value # to be inclusive. So we adjust the specified max_value using the multiplier. max_value -= 0.1 / multiplier min_value, max_value = find_histogram_min_max(fields, min_value, max_value, user_query, params, data_filter) key_column = None array_column = None histogram_function = None conditions = [] if len(fields) > 1: array_column = check_multihistogram_fields(fields) if array_column == "measurements": key_column = "array_join(measurements_key)" histogram_function = get_measurement_name elif array_column == "span_op_breakdowns": key_column = "array_join(span_op_breakdowns_key)" histogram_function = get_span_op_breakdown_name else: raise InvalidSearchQuery( "multihistogram expected either all measurements or all breakdowns" ) key_alias = get_function_alias(key_column) field_names = [histogram_function(field) for field in fields] conditions.append([key_alias, "IN", field_names]) if extra_conditions: conditions.extend(extra_conditions) histogram_params = find_histogram_params(num_buckets, min_value, max_value, multiplier) histogram_column = get_histogram_column(fields, key_column, histogram_params, array_column) histogram_alias = get_function_alias(histogram_column) if min_value is None or max_value is None: return normalize_histogram_results(fields, key_column, histogram_params, {"data": []}, array_column) # make sure to bound the bins to get the desired range of results if min_value is not None: min_bin = histogram_params.start_offset conditions.append([histogram_alias, ">=", min_bin]) if max_value is not None: max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets conditions.append([histogram_alias, "<=", max_bin]) columns = [] if key_column is None else [key_column] limit = len(fields) * num_buckets histogram_query = prepare_discover_query( selected_columns=columns + [histogram_column, "count()"], conditions=conditions, query=user_query, params=params, orderby=(order_by if order_by else []) + [histogram_alias], functions_acl=["array_join", "histogram"], ) snuba_filter = histogram_query.filter if group_by: snuba_filter.groupby += group_by result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limitby=limit_by, limit=limit, referrer=referrer, ) results = transform_results( result, histogram_query.fields["functions"], histogram_query.columns, snuba_filter, ) if not normalize_results: return results return normalize_histogram_results(fields, key_column, histogram_params, results, array_column)
def snuba_search(start, end, project_ids, environment_ids, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = { 'project_id': project_ids, } if environment_ids is not None: filters['environment'] = environment_ids if candidate_ids: filters['issue'] = candidate_ids conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on issue fields here, they're not available search_filter.key.name in issue_only_fields or # We special case date search_filter.key.name == 'date' ): continue converted_filter = convert_search_filter_to_snuba_query(search_filter) # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, 'total'] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append((sort_field, '>=' if cursor.is_prev else '<=', cursor.value)) selected_columns = [] if get_sample: query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append(('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample')) sort_field = 'sample' orderby = [sort_field] referrer = 'search_sample' else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ['-{}'.format(sort_field), 'issue'] # ensure stable sort within the same score referrer = 'search' snuba_results = snuba.raw_query( start=start, end=end, selected_columns=selected_columns, groupby=['issue'], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals=True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results['data'] total = snuba_results['totals']['total'] if not get_sample: metrics.timing('snuba.search.num_result_groups', len(rows)) return [(row['issue'], row[sort_field]) for row in rows], total
def snuba_search(start, end, project_ids, environment_ids, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = { 'project_id': project_ids, } if environment_ids is not None: filters['environment'] = environment_ids if candidate_ids: filters['issue'] = candidate_ids conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on issue fields here, they're not available search_filter.key.name in issue_only_fields or # We special case date search_filter.key.name == 'date'): continue converted_filter = convert_search_filter_to_snuba_query(search_filter) # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, 'total'] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append( (sort_field, '>=' if cursor.is_prev else '<=', cursor.value)) selected_columns = [] if get_sample: query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append( ('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample')) sort_field = 'sample' orderby = [sort_field] referrer = 'search_sample' else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ['-{}'.format(sort_field), 'issue'] # ensure stable sort within the same score referrer = 'search' snuba_results = snuba.raw_query( start=start, end=end, selected_columns=selected_columns, groupby=['issue'], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals= True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results['data'] total = snuba_results['totals']['total'] if not get_sample: metrics.timing('snuba.search.num_result_groups', len(rows)) return [(row['issue'], row[sort_field]) for row in rows], total
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") snuba_filter = get_filter(query, params) # TODO(mark) Refactor the need for this translation shim once all of # discover is using this module. Remember to update all the functions # in this module. snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, "orderby": orderby, "having": [], } if use_aggregate_conditions: snuba_args["having"] = snuba_filter.having snuba_args.update( resolve_field_list(selected_columns, snuba_args, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_args["conditions"].extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_args, translated_columns = resolve_discover_aliases(snuba_args) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_args.get("having"): found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_args.get("aggregations")) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) result = raw_query( start=snuba_args.get("start"), end=snuba_args.get("end"), groupby=snuba_args.get("groupby"), conditions=snuba_args.get("conditions"), aggregations=snuba_args.get("aggregations"), selected_columns=snuba_args.get("selected_columns"), filter_keys=snuba_args.get("filter_keys"), having=snuba_args.get("having"), orderby=snuba_args.get("orderby"), dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) return transform_results(result, translated_columns, snuba_args)
def timeseries_query(selected_columns, query, params, rollup, reference_event=None, referrer=None): """ High-level API for doing arbitrary user timeseries queries against events. This function operates on the public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. This function is intended to only get timeseries based results and thus requires the `rollup` parameter. Returns a SnubaTSResult object that has been zerofilled in case of gaps. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, rollup (int) The bucket width in seconds reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. """ snuba_filter = get_filter(query, params) snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, "having": snuba_filter.having, } if not snuba_args["start"] and not snuba_args["end"]: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") snuba_args.update( resolve_field_list(selected_columns, snuba_args, auto_fields=False)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_args["conditions"].extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_args, _ = resolve_discover_aliases(snuba_args) if not snuba_args["aggregations"]: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") # Change the alias of the first aggregation to count. This ensures compatibility # with other parts of the timeseries endpoint expectations if len(snuba_args["aggregations"]) == 1: snuba_args["aggregations"][0][2] = "count" result = raw_query( aggregations=snuba_args.get("aggregations"), conditions=snuba_args.get("conditions"), filter_keys=snuba_args.get("filter_keys"), start=snuba_args.get("start"), end=snuba_args.get("end"), rollup=rollup, orderby="time", groupby=["time"], dataset=Dataset.Discover, limit=10000, referrer=referrer, ) result = zerofill(result["data"], snuba_args["start"], snuba_args["end"], rollup, "time") return SnubaTSResult({"data": result}, snuba_filter.start, snuba_filter.end, rollup)
def get_facets(query, params, limit=10, referrer=None): """ High-level API for getting 'facet map' results. Facets are high frequency tags and attribute results that can be used to further refine user queries. When many projects are requested sampling will be enabled to help keep response times low. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. Returns Sequence[FacetResult] """ snuba_filter = get_filter(query, params) # TODO(mark) Refactor the need for this translation shim. snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, } # Resolve the public aliases into the discover dataset names. snuba_args, translated_columns = resolve_discover_aliases(snuba_args) # Exclude tracing tags as they are noisy and generally not helpful. excluded_tags = [ "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"] ] # Sampling keys for multi-project results as we don't need accuracy # with that much data. sample = len(snuba_filter.filter_keys["project_id"]) > 2 # Get the most frequent tag keys key_names = raw_query( aggregations=[["count", None, "count"]], start=snuba_args.get("start"), end=snuba_args.get("end"), conditions=snuba_args.get("conditions"), filter_keys=snuba_args.get("filter_keys"), orderby=["-count", "tags_key"], groupby="tags_key", having=[excluded_tags], dataset=Dataset.Discover, limit=limit, referrer=referrer, turbo=sample, ) top_tags = [r["tags_key"] for r in key_names["data"]] if not top_tags: return [] # TODO(mark) Make the sampling rate scale based on the result size and scaling factor in # sentry.options. To test the lowest acceptable sampling rate, we use 0.1 which # is equivalent to turbo. We don't use turbo though as we need to re-scale data, and # using turbo could cause results to be wrong if the value of turbo is changed in snuba. sample_rate = 0.1 if key_names["data"][0]["count"] > 10000 else None # Rescale the results if we're sampling multiplier = 1 / sample_rate if sample_rate is not None else 1 fetch_projects = False if len(params.get("project_id", [])) > 1: if len(top_tags) == limit: top_tags.pop() fetch_projects = True results = [] if fetch_projects: project_values = raw_query( aggregations=[["count", None, "count"]], start=snuba_args.get("start"), end=snuba_args.get("end"), conditions=snuba_args.get("conditions"), filter_keys=snuba_args.get("filter_keys"), groupby="project_id", orderby="-count", dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, # Ensures Snuba will not apply FINAL turbo=sample_rate is not None, ) results.extend([ FacetResult("project", r["project_id"], int(r["count"]) * multiplier) for r in project_values["data"] ]) # Get tag counts for our top tags. Fetching them individually # allows snuba to leverage promoted tags better and enables us to get # the value count we want. max_aggregate_tags = options.get("discover2.max_tags_to_combine") individual_tags = [] aggregate_tags = [] for i, tag in enumerate(top_tags): if tag == "environment": # Add here tags that you want to be individual individual_tags.append(tag) elif i >= len(top_tags) - max_aggregate_tags: aggregate_tags.append(tag) else: individual_tags.append(tag) for tag_name in individual_tags: tag = u"tags[{}]".format(tag_name) tag_values = raw_query( aggregations=[["count", None, "count"]], conditions=snuba_args.get("conditions"), start=snuba_args.get("start"), end=snuba_args.get("end"), filter_keys=snuba_args.get("filter_keys"), orderby=["-count"], groupby=[tag], limit=TOP_VALUES_DEFAULT_LIMIT, dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, # Ensures Snuba will not apply FINAL turbo=sample_rate is not None, ) results.extend([ FacetResult(tag_name, r[tag], int(r["count"]) * multiplier) for r in tag_values["data"] ]) if aggregate_tags: conditions = snuba_args.get("conditions", []) conditions.append(["tags_key", "IN", aggregate_tags]) tag_values = raw_query( aggregations=[["count", None, "count"]], conditions=conditions, start=snuba_args.get("start"), end=snuba_args.get("end"), filter_keys=snuba_args.get("filter_keys"), orderby=["tags_key", "-count"], groupby=["tags_key", "tags_value"], dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, # Ensures Snuba will not apply FINAL turbo=sample_rate is not None, limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"], ) results.extend([ FacetResult(r["tags_key"], r["tags_value"], int(r["count"]) * multiplier) for r in tag_values["data"] ]) return results
def get_attrs(self, item_list, user): if not self._collapse("base"): attrs = super().get_attrs(item_list, user) else: seen_stats = self._get_seen_stats(item_list, user) if seen_stats: attrs = {item: seen_stats.get(item, {}) for item in item_list} else: attrs = {item: {} for item in item_list} if self.stats_period and not self._collapse("stats"): partial_get_stats = functools.partial( self.get_stats, item_list=item_list, user=user, environment_ids=self.environment_ids) stats = partial_get_stats() filtered_stats = (partial_get_stats( conditions=self.conditions) if self.conditions and not self._collapse("filtered") else None) for item in item_list: if filtered_stats: attrs[item].update( {"filtered_stats": filtered_stats[item.id]}) attrs[item].update({"stats": stats[item.id]}) if self._expand("sessions"): uniq_project_ids = list( {item.project_id for item in item_list}) cache_keys = { pid: self._build_session_cache_key(pid) for pid in uniq_project_ids } cache_data = cache.get_many(cache_keys.values()) missed_items = [] for item in item_list: num_sessions = cache_data.get(cache_keys[item.project_id]) if num_sessions is None: found = "miss" missed_items.append(item) else: found = "hit" attrs[item].update({ "sessionCount": num_sessions, }) metrics.incr(f"group.get_session_counts.{found}") if missed_items: filters = { "project_id": list({item.project_id for item in missed_items}) } if self.environment_ids: filters["environment"] = self.environment_ids result_totals = raw_query( selected_columns=["sessions"], dataset=Dataset.Sessions, start=self.start, end=self.end, filter_keys=filters, groupby=["project_id"], referrer= "serializers.GroupSerializerSnuba.session_totals", ) results = {} for data in result_totals["data"]: cache_key = self._build_session_cache_key( data["project_id"]) results[data["project_id"]] = data["sessions"] cache.set(cache_key, data["sessions"], 3600) for item in missed_items: if item.project_id in results.keys(): attrs[item].update({ "sessionCount": results[item.project_id], }) else: attrs[item].update({"sessionCount": None}) if self._expand("inbox"): inbox_stats = get_inbox_details(item_list) for item in item_list: attrs[item].update({"inbox": inbox_stats.get(item.id)}) if self._expand("owners"): owner_details = get_owner_details(item_list) for item in item_list: attrs[item].update({"owners": owner_details.get(item.id)}) return attrs
def validate(self, data): """ Performs validation on an alert rule's data. This includes ensuring there is either 1 or 2 triggers, which each have actions, and have proper thresholds set. The critical trigger should both alert and resolve 'after' the warning trigger (whether that means > or < the value depends on threshold type). """ data.setdefault("dataset", QueryDatasets.EVENTS) project_id = data.get("projects") if not project_id: # We just need a valid project id from the org so that we can verify # the query. We don't use the returned data anywhere, so it doesn't # matter which. project_id = list(self.context["organization"].project_set.all()[:1]) try: snuba_filter = build_snuba_filter( data["dataset"], data["query"], data["aggregate"], data.get("environment"), data.get("event_types"), params={ "project_id": [p.id for p in project_id], "start": timezone.now() - timedelta(minutes=10), "end": timezone.now(), }, ) if any(cond[0] == "project_id" for cond in snuba_filter.conditions): raise serializers.ValidationError({"query": "Project is an invalid search term"}) except (InvalidSearchQuery, ValueError) as e: raise serializers.ValidationError(f"Invalid Query or Metric: {e}") else: if not snuba_filter.aggregations: raise serializers.ValidationError( "Invalid Metric: Please pass a valid function for aggregation" ) try: raw_query( aggregations=snuba_filter.aggregations, start=snuba_filter.start, end=snuba_filter.end, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, dataset=Dataset(data["dataset"].value), limit=1, referrer="alertruleserializer.test_query", ) except Exception: logger.exception("Error while validating snuba alert rule query") raise serializers.ValidationError( "Invalid Query or Metric: An error occurred while attempting " "to run the query" ) triggers = data.get("triggers", []) if not triggers: raise serializers.ValidationError("Must include at least one trigger") if len(triggers) > 2: raise serializers.ValidationError( "Must send 1 or 2 triggers - A critical trigger, and an optional warning trigger" ) event_types = data.get("event_types") valid_event_types = dataset_valid_event_types[data["dataset"]] if event_types and set(event_types) - valid_event_types: raise serializers.ValidationError( "Invalid event types for this dataset. Valid event types are %s" % sorted([et.name.lower() for et in valid_event_types]) ) for i, (trigger, expected_label) in enumerate( zip(triggers, (CRITICAL_TRIGGER_LABEL, WARNING_TRIGGER_LABEL)) ): if trigger.get("label", None) != expected_label: raise serializers.ValidationError( f'Trigger {i + 1} must be labeled "{expected_label}"' ) critical = triggers[0] threshold_type = data["threshold_type"] self._validate_trigger_thresholds(threshold_type, critical, data.get("resolve_threshold")) if len(triggers) == 2: warning = triggers[1] self._validate_trigger_thresholds( threshold_type, warning, data.get("resolve_threshold") ) self._validate_critical_warning_triggers(threshold_type, critical, warning) return data
def _get_release_sessions_time_bounds(project_id, release, org_id, environments=None): """ Get the sessions time bounds in terms of when the first session started and when the last session started according to a specific (project_id, org_id, release, environments) combination Inputs: * project_id * release * org_id: Organisation Id * environments Return: Dictionary with two keys "sessions_lower_bound" and "sessions_upper_bound" that correspond to when the first session occurred and when the last session occurred respectively """ def iso_format_snuba_datetime(date): return datetime.strptime( date, "%Y-%m-%dT%H:%M:%S+00:00").isoformat()[:19] + "Z" release_sessions_time_bounds = { "sessions_lower_bound": None, "sessions_upper_bound": None, } filter_keys = {"project_id": [project_id], "org_id": [org_id]} conditions = [["release", "=", release]] if environments is not None: conditions.append(["environment", "IN", environments]) rows = raw_query( dataset=Dataset.Sessions, selected_columns=["first_session_started", "last_session_started"], aggregations=[ ["min(started)", None, "first_session_started"], ["max(started)", None, "last_session_started"], ], conditions=conditions, filter_keys=filter_keys, referrer="sessions.release-sessions-time-bounds", )["data"] formatted_unix_start_time = datetime.utcfromtimestamp(0).strftime( "%Y-%m-%dT%H:%M:%S+00:00") if rows: rv = rows[0] # This check is added because if there are no sessions found, then the # aggregations query return both the sessions_lower_bound and the # sessions_upper_bound as `0` timestamp and we do not want that behaviour # by default # P.S. To avoid confusion the `0` timestamp which is '1970-01-01 00:00:00' # is rendered as '0000-00-00 00:00:00' in clickhouse shell if set(rv.values()) != {formatted_unix_start_time}: release_sessions_time_bounds = { "sessions_lower_bound": iso_format_snuba_datetime(rv["first_session_started"]), "sessions_upper_bound": iso_format_snuba_datetime(rv["last_session_started"]), } return release_sessions_time_bounds
def get_performance_facets( query, params, orderby=None, aggregate_column="duration", aggregate_function="avg", limit=20, referrer=None, ): """ High-level API for getting 'facet map' results for performance data Performance facets are high frequency tags and the aggregate duration of their most frequent values query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. Returns Sequence[FacetResult] """ with sentry_sdk.start_span( op="discover.discover", description="facets.filter_transform" ) as span: span.set_data("query", query) snuba_filter = get_filter(query, params) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter) # Exclude tracing tags as they are noisy and generally not helpful. # TODO(markus): Tracing tags are no longer written but may still reside in DB. excluded_tags = ["tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"]] # Sampling keys for multi-project results as we don't need accuracy # with that much data. sample = len(snuba_filter.filter_keys["project_id"]) > 2 with sentry_sdk.start_span(op="discover.discover", description="facets.frequent_tags"): # Get the tag keys with the highest deviation key_names = raw_query( aggregations=[["stddevSamp", aggregate_column, "stddev"]], start=snuba_filter.start, end=snuba_filter.end, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, orderby=["-stddev", "tags_key"], groupby="tags_key", # TODO(Kevan): Check using having vs where before mainlining having=[excluded_tags], dataset=Dataset.Discover, limit=limit, referrer=referrer, turbo=sample, ) top_tags = [r["tags_key"] for r in key_names["data"]] if not top_tags: return [] results = [] sampling_enabled = True options_sample_rate = options.get("discover2.tags_performance_facet_sample_rate") or 0.1 sample_rate = options_sample_rate if sampling_enabled else None max_aggregate_tags = 20 aggregate_tags = [] for i, tag in enumerate(top_tags): if i >= len(top_tags) - max_aggregate_tags: aggregate_tags.append(tag) if orderby is None: orderby = [] if aggregate_tags: with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"): conditions = snuba_filter.conditions conditions.append(["tags_key", "IN", aggregate_tags]) tag_values = raw_query( aggregations=[[aggregate_function, aggregate_column, "aggregate"]], conditions=conditions, start=snuba_filter.start, end=snuba_filter.end, filter_keys=snuba_filter.filter_keys, orderby=orderby + ["tags_key"], groupby=["tags_key", "tags_value"], dataset=Dataset.Discover, referrer=referrer, sample=sample_rate, turbo=sample_rate is not None, limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"], ) results.extend( [ FacetResult(r["tags_key"], r["tags_value"], int(r["aggregate"])) for r in tag_values["data"] ] ) return results
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. auto_aggregations (bool) Whether aggregates should be added automatically if they're used in conditions, and there's at least one aggregate already. use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span( op="discover.discover", description="query.filter_transform" ) as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] function_translations = {} with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations ) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations ) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}.".format( ", ".join(conditions_not_in_aggregations), error_extra, ) ) else: found = any( having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations ) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}.".format( having_clause[0], error_extra, ) ) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span( op="discover.discover", description="query.transform_results" ) as span: span.set_data("result_count", len(result.get("data", []))) return transform_results( result, resolved_fields["functions"], translated_columns, snuba_filter, selected_columns )
def top_events_timeseries( timeseries_columns, selected_columns, user_query, params, orderby, rollup, limit, organization, referrer=None, top_events=None, allow_empty=True, ): """ High-level API for doing arbitrary user timeseries queries for a limited number of top events Returns a dictionary of SnubaTSResult objects that have been zerofilled in case of gaps. Each value of the dictionary should match the result of a timeseries query timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query, usually matches the y-axis of the graph selected_columns (Sequence[str]) List of public aliases to fetch for the events query, this is to determine what the top events are user_query (str) Filter query string to create conditions from. needs to be user_query to not conflict with the function query params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, orderby (Sequence[str]) The fields to order results by. rollup (int) The bucket width in seconds limit (int) The number of events to get timeseries for organization (Organization) Used to map group ids to short ids referrer (str|None) A referrer string to help locate the origin of this query. top_events (dict|None) A dictionary with a 'data' key containing a list of dictionaries that represent the top events matching the query. Useful when you have found the top events earlier and want to save a query. """ if top_events is None: with sentry_sdk.start_span(op="discover.discover", description="top_events.fetch_events"): top_events = query( selected_columns, query=user_query, params=params, orderby=orderby, limit=limit, referrer=referrer, auto_aggregations=True, use_aggregate_conditions=True, ) with sentry_sdk.start_span( op="discover.discover", description="top_events.filter_transform") as span: span.set_data("query", user_query) snuba_filter, translated_columns = get_timeseries_snuba_filter( list(sorted(set(timeseries_columns + selected_columns))), user_query, params, rollup, default_count=False, ) for field in selected_columns: # If we have a project field, we need to limit results by project so we dont hit the result limit if field in ["project", "project.id"] and top_events["data"]: snuba_filter.project_ids = [ event["project.id"] for event in top_events["data"] ] continue if field in FIELD_ALIASES: field = FIELD_ALIASES[field].alias # Note that because orderby shouldn't be an array field its not included in the values values = list({ event.get(field) for event in top_events["data"] if field in event and not isinstance(event.get(field), list) }) if values: # timestamp fields needs special handling, creating a big OR instead if field == "timestamp" or field.startswith("timestamp.to_"): snuba_filter.conditions.append([[field, "=", value] for value in sorted(values) ]) elif None in values: non_none_values = [ value for value in values if value is not None ] condition = [[["isNull", [resolve_discover_column(field)]], "=", 1]] if non_none_values: condition.append([ resolve_discover_column(field), "IN", non_none_values ]) snuba_filter.conditions.append(condition) elif field in FIELD_ALIASES: snuba_filter.conditions.append([field, "IN", values]) else: snuba_filter.conditions.append( [resolve_discover_column(field), "IN", values]) with sentry_sdk.start_span(op="discover.discover", description="top_events.snuba_query"): result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, selected_columns=snuba_filter.selected_columns, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby=["time"] + snuba_filter.groupby, groupby=["time"] + snuba_filter.groupby, dataset=Dataset.Discover, limit=10000, referrer=referrer, ) if not allow_empty and not len(result.get("data", [])): return SnubaTSResult( { "data": zerofill([], snuba_filter.start, snuba_filter.end, rollup, "time") }, snuba_filter.start, snuba_filter.end, rollup, ) with sentry_sdk.start_span( op="discover.discover", description="top_events.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) result = transform_data(result, translated_columns, snuba_filter) if "project" in selected_columns: translated_columns["project_id"] = "project" translated_groupby = [ translated_columns.get(groupby, groupby) for groupby in snuba_filter.groupby ] issues = {} if "issue" in selected_columns: issues = Group.issues_mapping( {event["issue.id"] for event in top_events["data"]}, params["project_id"], organization, ) # so the result key is consistent translated_groupby.sort() results = {} # Using the top events add the order to the results for index, item in enumerate(top_events["data"]): result_key = create_result_key(item, translated_groupby, issues) results[result_key] = {"order": index, "data": []} for row in result["data"]: result_key = create_result_key(row, translated_groupby, issues) if result_key in results: results[result_key]["data"].append(row) else: logger.warning( "discover.top-events.timeseries.key-mismatch", extra={ "result_key": result_key, "top_event_keys": list(results.keys()) }, ) for key, item in results.items(): results[key] = SnubaTSResult( { "data": zerofill(item["data"], snuba_filter.start, snuba_filter.end, rollup, "time"), "order": item["order"], }, snuba_filter.start, snuba_filter.end, rollup, ) return results
def snuba_search(start, end, project_ids, environment_ids, tags, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None, use_new_filters=False, **parameters): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ from sentry.search.base import ANY filters = { 'project_id': project_ids, } if environment_ids is not None: filters['environment'] = environment_ids if candidate_ids is not None: filters['issue'] = candidate_ids conditions = [] if use_new_filters: having = [] for search_filter in search_filters: if ( # Don't filter on issue fields here, they're not available search_filter.key.name in issue_only_fields # We special case date or search_filter.key.name == 'date'): continue converted_filter = convert_search_filter_to_snuba_query( search_filter) if search_filter.key.name in aggregation_defs: having.append(converted_filter) else: conditions.append(converted_filter) else: having = SnubaConditionBuilder({ 'age_from': ScalarCondition('first_seen', '>'), 'age_to': ScalarCondition('first_seen', '<'), 'last_seen_from': ScalarCondition('last_seen', '>'), 'last_seen_to': ScalarCondition('last_seen', '<'), 'times_seen': CallbackCondition( lambda times_seen: ('times_seen', '=', times_seen), ), 'times_seen_lower': ScalarCondition('times_seen', '>'), 'times_seen_upper': ScalarCondition('times_seen', '<'), }).build(parameters) for tag, val in sorted(tags.items()): col = u'tags[{}]'.format(tag) if val == ANY: conditions.append((col, '!=', '')) else: conditions.append((col, '=', val)) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, 'total'] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append( (sort_field, '>=' if cursor.is_prev else '<=', cursor.value)) selected_columns = [] if get_sample: # Get a random sample of matching groups. Because we use any(rand()), # we are testing against a single random value per group, and so the # sample is independent of the number of events in a group. Since we # are sampling using `ORDER by random() LIMIT x`, we will always grab # the full result set if there less than x total results. query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append( ('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample')) sort_field = 'sample' orderby = [sort_field] referrer = 'search_sample' else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ['-{}'.format(sort_field), 'issue'] # ensure stable sort within the same score referrer = 'search' snuba_results = snuba.raw_query( start=start, end=end, selected_columns=selected_columns, groupby=['issue'], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals= True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results['data'] total = snuba_results['totals']['total'] if not get_sample: metrics.timing('snuba.search.num_result_groups', len(rows)) return [(row['issue'], row[sort_field]) for row in rows], total
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. auto_aggregations (bool) Whether aggregates should be added automatically if they're used in conditions, and there's at least one aggregate already. use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] snuba_query = prepare_discover_query( selected_columns, query, params, orderby, auto_fields, auto_aggregations, use_aggregate_conditions, conditions, functions_acl, ) snuba_filter = snuba_query.filter with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results( result, snuba_query.fields["functions"], snuba_query.columns, snuba_filter, )
def top_events_timeseries( timeseries_columns, selected_columns, user_query, params, orderby, rollup, limit, organization, referrer=None, ): """ High-level API for doing arbitrary user timeseries queries for a limited number of top events Returns a dictionary of SnubaTSResult objects that have been zerofilled in case of gaps. Each value of the dictionary should match the result of a timeseries query timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query, usually matches the y-axis of the graph selected_columns (Sequence[str]) List of public aliases to fetch for the events query, this is to determine what the top events are user_query (str) Filter query string to create conditions from. needs to be user_query to not conflict with the function query params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, orderby (Sequence[str]) The fields to order results by. rollup (int) The bucket width in seconds limit (int) The number of events to get timeseries for organization (Organization) Used to map group ids to short ids referrer (str|None) A referrer string to help locate the origin of this query. """ top_events = query( selected_columns, query=user_query, params=params, orderby=orderby, limit=limit, referrer=referrer, ) snuba_filter, translated_columns = get_timeseries_snuba_filter( timeseries_columns + selected_columns, user_query, params, rollup) user_fields = FIELD_ALIASES["user"]["fields"] for field in selected_columns: # project is handled by filter_keys already if field in ["project", "project.id"]: continue values = list({ event.get(field) for event in top_events["data"] if field in event }) if values and all(value is not None for value in values): # timestamp needs special handling, creating a big OR instead if field == "timestamp": snuba_filter.conditions.append([["timestamp", "=", value] for value in values]) # A user field can be any of its field aliases, do an OR across all the user fields elif field == "user": snuba_filter.conditions.append( [[resolve_column(user_field), "IN", values] for user_field in user_fields]) else: snuba_filter.conditions.append( [resolve_column(field), "IN", values]) result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby="time", groupby=["time"] + snuba_filter.groupby, dataset=Dataset.Discover, limit=10000, referrer=referrer, ) result = transform_results(result, translated_columns, snuba_filter, selected_columns) translated_columns["project_id"] = "project" translated_groupby = [ translated_columns.get(field, field) for field in snuba_filter.groupby ] if "user" in selected_columns: # Determine user related fields to prune based on what wasn't selected, since transform_results does the same for field in user_fields: if field not in selected_columns: translated_groupby.remove(field) translated_groupby.append("user") issues = {} if "issue" in selected_columns: issues = Group.issues_mapping( set([event["issue.id"] for event in top_events["data"]]), params["project_id"], organization, ) # so the result key is consistent translated_groupby.sort() results = {} for row in result["data"]: values = [] for field in translated_groupby: if field == "issue.id": values.append(issues.get(row["issue.id"], "unknown")) else: values.append(six.text_type(row.get(field))) result_key = ",".join(values) results.setdefault(result_key, []).append(row) for key, item in six.iteritems(results): results[key] = SnubaTSResult( { "data": zerofill(item, snuba_filter.start, snuba_filter.end, rollup, "time") }, snuba_filter.start, snuba_filter.end, rollup, ) return results
def get_project_release_stats(project_id, release, stat, rollup, start, end, environments=None): assert stat in ("users", "sessions") # since snuba end queries are exclusive of the time and we're bucketing to # a full hour, we need to round to the next hour since snuba is exclusive # on the end. end = to_datetime( (to_timestamp(end) // DATASET_BUCKET + 1) * DATASET_BUCKET) filter_keys = {"project_id": [project_id]} conditions = [["release", "=", release]] if environments is not None: conditions.append(["environment", "IN", environments]) buckets = int((end - start).total_seconds() / rollup) stats = _make_stats(start, rollup, buckets, default=None) # Due to the nature of the probabilistic data structures some # subtractions can become negative. As such we're making sure a number # never goes below zero to avoid confusion. totals = { stat: 0, stat + "_healthy": 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0, } for rv in raw_query( dataset=Dataset.Sessions, selected_columns=[ "bucketed_started", stat, stat + "_crashed", stat + "_abnormal", stat + "_errored", "duration_quantiles", ], groupby=["bucketed_started"], start=start, end=end, rollup=rollup, conditions=conditions, filter_keys=filter_keys, )["data"]: ts = parse_snuba_datetime(rv["bucketed_started"]) bucket = int((ts - start).total_seconds() / rollup) stats[bucket][1] = { stat: rv[stat], stat + "_healthy": max(0, rv[stat] - rv[stat + "_errored"]), stat + "_crashed": rv[stat + "_crashed"], stat + "_abnormal": rv[stat + "_abnormal"], stat + "_errored": max( 0, rv[stat + "_errored"] - rv[stat + "_crashed"] - rv[stat + "_abnormal"]), "duration_p50": _convert_duration(rv["duration_quantiles"][0]), "duration_p90": _convert_duration(rv["duration_quantiles"][1]), } # Session stats we can sum up directly without another query # as the data becomes available. if stat == "sessions": for k in totals: totals[k] += stats[bucket][1][k] for idx, bucket in enumerate(stats): if bucket[1] is None: stats[idx][1] = { stat: 0, stat + "_healthy": 0, stat + "_crashed": 0, stat + "_abnormal": 0, stat + "_errored": 0, "duration_p50": None, "duration_p90": None, } # For users we need a secondary query over the entire time range if stat == "users": rows = raw_query( dataset=Dataset.Sessions, selected_columns=[ "users", "users_crashed", "users_abnormal", "users_errored" ], start=start, end=end, conditions=conditions, filter_keys=filter_keys, )["data"] if rows: rv = rows[0] totals = { "users": rv["users"], "users_healthy": max(0, rv["users"] - rv["users_errored"]), "users_crashed": rv["users_crashed"], "users_abnormal": rv["users_abnormal"], "users_errored": max( 0, rv["users_errored"] - rv["users_crashed"] - rv["users_abnormal"]), } return stats, totals
def _get_release_adoption(project_releases, environments=None, now=None): """Get the adoption of the last 24 hours (or a difference reference timestamp).""" conditions, filter_keys = _get_conditions_and_filter_keys( project_releases, environments) if now is None: now = datetime.now(pytz.utc) start = now - timedelta(days=1) total_conditions = [] if environments is not None: total_conditions.append(["environment", "IN", environments]) # Users Adoption total_users = {} # Session Adoption total_sessions = {} for x in raw_query( dataset=Dataset.Sessions, selected_columns=["project_id", "users", "sessions"], groupby=["project_id"], start=start, conditions=total_conditions, filter_keys=filter_keys, referrer="sessions.release-adoption-total-users-and-sessions", )["data"]: total_users[x["project_id"]] = x["users"] total_sessions[x["project_id"]] = x["sessions"] rv = {} for x in raw_query( dataset=Dataset.Sessions, selected_columns=["release", "project_id", "users", "sessions"], groupby=["release", "project_id"], start=start, conditions=conditions, filter_keys=filter_keys, referrer="sessions.release-adoption-list", )["data"]: # Users Adoption total_users_count = total_users.get(x["project_id"]) users_adoption = None if total_users_count: users_adoption = float(x["users"]) / total_users_count * 100 # Sessions Adoption total_sessions_count = total_sessions.get(x["project_id"]) sessions_adoption = None if total_sessions_count: sessions_adoption = float(x["sessions"] / total_sessions_count * 100) rv[x["project_id"], x["release"]] = { "adoption": users_adoption, "sessions_adoption": sessions_adoption, "users_24h": x["users"], "sessions_24h": x["sessions"], "project_users_24h": total_users_count, "project_sessions_24h": total_sessions_count, } return rv
def find_histogram_buckets(field, params, conditions): match = is_function(field) if not match: raise InvalidSearchQuery( u"received {}, expected histogram function".format(field)) columns = [ c.strip() for c in match.group("columns").split(",") if len(c.strip()) > 0 ] if len(columns) != 2: raise InvalidSearchQuery( u"histogram(...) expects 2 column arguments, received {:g} arguments" .format(len(columns))) column = columns[0] # TODO evanh: This can be expanded to more fields at a later date, for now keep this limited. if column != "transaction.duration": raise InvalidSearchQuery( "histogram(...) can only be used with the transaction.duration column" ) try: num_buckets = int(columns[1]) if num_buckets < 1 or num_buckets > 500: raise Exception() except Exception: raise InvalidSearchQuery( u"histogram(...) requires a bucket value between 1 and 500, not {}" .format(columns[1])) alias = u"max_{}".format(column) conditions = deepcopy(conditions) if conditions else [] found = False for cond in conditions: if (cond[0], cond[1], cond[2]) == ("event.type", "=", "transaction"): found = True if not found: conditions.append(["event.type", "=", "transaction"]) translated_args, _ = resolve_discover_aliases({"conditions": conditions}) results = raw_query( filter_keys={"project_id": params.get("project_id")}, start=params.get("start"), end=params.get("end"), dataset=Dataset.Discover, conditions=translated_args["conditions"], aggregations=[["max", "duration", alias]], ) if len(results["data"]) != 1: # If there are no transactions, so no max duration, return one empty bucket return "histogram({}, 1, 1)".format(column) bucket_max = results["data"][0][alias] if bucket_max == 0: raise InvalidSearchQuery( u"Cannot calculate histogram for {}".format(field)) bucket_number = ceil(bucket_max / float(num_buckets)) return "histogram({}, {:g}, {:g})".format(column, num_buckets, bucket_number)