def transform_deprecated_functions_in_orderby(orderby): if not orderby: return orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] new_orderby = [] for order in orderby: has_negative = False column = order if order.startswith("-"): has_negative = True column = order.strip("-") new_column = column if column in OLD_FUNCTIONS_TO_NEW: new_column = get_function_alias(OLD_FUNCTIONS_TO_NEW[column]) elif column.replace("()", "") in OLD_FUNCTIONS_TO_NEW: new_column = get_function_alias(OLD_FUNCTIONS_TO_NEW[column.replace("()", "")]) if has_negative: new_column = "-" + new_column new_orderby.append(new_column) return new_orderby
def find_measurements_min_max(measurements, min_value, max_value, user_query, params): """ Find the min/max value of the specified measurements. If either min/max is already specified, it will be used and not queried for. :param [str] measurements: The list of measurements for which you want to generate the histograms for. :param float min_value: The minimum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param float max_value: The maximum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param str user_query: Filter query string to create conditions from. :param {str: str} params: Filtering parameters with start, end, project_id, environment """ if min_value is not None and max_value is not None: return min_value, max_value min_columns, max_columns = [], [] for measurement in measurements: if min_value is None: min_columns.append("min(measurements.{})".format(measurement)) if max_value is None: max_columns.append("max(measurements.{})".format(measurement)) results = query( selected_columns=min_columns + max_columns, query=user_query, params=params, limit=1, referrer="api.organization-events-measurements-min-max", auto_fields=True, use_aggregate_conditions=True, ) data = results.get("data") # there should be exactly 1 row in the results, but if something went wrong here, # we force the min/max to be None to coerce an empty histogram if data is None or len(data) != 1: return None, None row = data[0] if min_value is None: min_values = [ row[get_function_alias(column)] for column in min_columns ] min_values = list(filter(lambda v: v is not None, min_values)) min_value = min(min_values) if min_values else None if max_value is None: max_values = [ row[get_function_alias(column)] for column in max_columns ] max_values = list(filter(lambda v: v is not None, max_values)) max_value = max(max_values) if max_values else None return min_value, max_value
def get(self, request, organization): """ Find the event id with the closest value to an aggregate for a given query """ if not self.has_feature(organization, request): return Response(status=404) with sentry_sdk.start_span(op="discover.endpoint", description="filter_params") as span: span.set_data("organization", organization) try: params = self.get_filter_params(request, organization) except NoProjects: return Response(status=404) params = self.quantize_date_params(request, params) # Assumption is that users will want the 50th percentile baseline_function = request.GET.get("baselineFunction", "p50()") # If the baseline was calculated already save ourselves a query baseline_value = request.GET.get("baselineValue") baseline_alias = get_function_alias(baseline_function) with self.handle_query_errors(): if baseline_value is None: result = discover.query( selected_columns=[baseline_function], params=params, query=request.GET.get("query"), limit=1, referrer="api.transaction-baseline.get_value", ) baseline_value = result["data"][0].get( baseline_alias) if "data" in result else None if baseline_value is None: return Response(status=404) delta_column = "absolute_delta(transaction.duration,{})".format( baseline_value) result = discover.query( selected_columns=[ "project", "timestamp", "id", "transaction.duration", delta_column, ], # Find the most recent transaction that's closest to the baseline value # id as the last item for consistent results orderby=[get_function_alias(delta_column), "-timestamp", "id"], params=params, query=request.GET.get("query"), limit=1, referrer="api.transaction-baseline.get_id", ) if len(result["data"]) == 0: return Response(status=404) baseline_data = result["data"][0] baseline_data[baseline_alias] = baseline_value return Response(baseline_data)
def normalize_measurements_histogram(measurements, num_buckets, key_col, histogram_params, results): """ Normalizes the histogram results by renaming the columns to key and bin and make sure to zerofill any missing values. :param [str] measurements: The list of measurements for which you want to generate the histograms for. :param int num_buckets: The number of buckets the histogram should contain. :param str key_col: The column of the key name. :param HistogramParms histogram_params: The histogram parameters used. :param any results: The results from the histogram query that may be missing bins and needs to be normalized. """ measurements = sorted(measurements) key_name = get_function_alias(key_col) bin_name = get_function_alias(get_measurements_histogram_col(histogram_params)) # adjust the meta for the renamed columns meta = results["meta"] new_meta = [] meta_map = {key_name: "key", bin_name: "bin"} for col in meta: new_meta.append({"type": col["type"], "name": meta_map.get(col["name"], col["name"])}) results["meta"] = new_meta # zerofill and rename the columns while making sure to adjust for precision data = results["data"] new_data = [] bucket_maps = {m: {} for m in measurements} for row in data: measurement = row[key_name] # we expect the bin the be an integer, this is because all floating # point values are rounded during the calculation bucket = int(row[bin_name]) # ignore unexpected measurements if measurement in bucket_maps: bucket_maps[measurement][bucket] = row["count"] for i in range(num_buckets): bucket = histogram_params.start_offset + histogram_params.bucket_size * i for measurement in measurements: # we want to rename the columns here row = { "key": measurement, "bin": bucket, "count": bucket_maps[measurement].get(bucket, 0), } # make sure to adjust for the precision if necessary if histogram_params.multiplier > 1: row["bin"] /= float(histogram_params.multiplier) new_data.append(row) results["data"] = new_data return results
def normalize_histogram_results(fields, key_column, histogram_params, results, array_column): """ Normalizes the histogram results by renaming the columns to key and bin and make sure to zerofill any missing values. :param [str] fields: The list of fields for which you want to generate the histograms for. :param str key_column: The column of the key name. :param HistogramParms histogram_params: The histogram parameters used. :param any results: The results from the histogram query that may be missing bins and needs to be normalized. :param str array_column: Array column prefix """ # `key_name` is only used when generating a multi histogram of measurement values. # It contains the name of the corresponding measurement for that row. key_name = None if key_column is None else get_function_alias(key_column) histogram_column = get_histogram_column(fields, key_column, histogram_params, array_column) bin_name = get_function_alias(histogram_column) # zerofill and rename the columns while making sure to adjust for precision bucket_maps = {field: {} for field in fields} for row in results["data"]: # Fall back to the first field name if there is no `key_name`, # otherwise, this is an array value name and format it as such. key = ( fields[0] if key_name is None else f"{get_array_column_alias(array_column)}.{get_array_column_field(array_column, row[key_name])}" ) # we expect the bin the be an integer, this is because all floating # point values are rounded during the calculation bucket = int(row[bin_name]) # ignore unexpected keys if key in bucket_maps: bucket_maps[key][bucket] = row["count"] new_data = {field: [] for field in fields} for i in range(histogram_params.num_buckets): bucket = histogram_params.start_offset + histogram_params.bucket_size * i for field in fields: row = { "bin": bucket, "count": bucket_maps[field].get(bucket, 0), } # make sure to adjust for the precision if necessary if histogram_params.multiplier > 1: row["bin"] /= float(histogram_params.multiplier) new_data[field].append(row) return new_data
def transform_deprecated_functions_in_columns(columns): new_list = [] translations = {} for column in columns: if column in OLD_FUNCTIONS_TO_NEW: new_column = OLD_FUNCTIONS_TO_NEW[column] translations[get_function_alias(new_column)] = column new_list.append(new_column) elif column.replace("()", "") in OLD_FUNCTIONS_TO_NEW: new_column = OLD_FUNCTIONS_TO_NEW[column.replace("()", "")] translations[get_function_alias(new_column)] = column.replace("()", "") new_list.append(new_column) else: new_list.append(column) return new_list, translations
def validate(self, data): if not data.get("id"): keys = set(data.keys()) if self.required_for_create - keys: raise serializers.ValidationError({ "fields": "fields are required during creation.", "conditions": "conditions are required during creation.", }) # Validate the query that would be created when run. conditions = self._get_attr(data, "conditions", "") fields = self._get_attr(data, "fields", []) orderby = self._get_attr(data, "orderby", "") try: snuba_filter = get_filter(conditions) except InvalidSearchQuery as err: raise serializers.ValidationError( {"conditions": f"Invalid conditions: {err}"}) if orderby: snuba_filter.orderby = get_function_alias(orderby) try: resolve_field_list(fields, snuba_filter) except InvalidSearchQuery as err: raise serializers.ValidationError( {"fields": f"Invalid fields: {err}"}) return data
def serialize_multiple_axis(self, serializer, event_result, columns, query_columns): # Return with requested yAxis as the key return { column: serializer.serialize(event_result, get_function_alias(query_column)) for column, query_column in zip(columns, query_columns) }
def get_event_stats_data(self, request, organization, get_event_stats, top_events=False): try: columns = request.GET.getlist("yAxis", ["count()"]) query = request.GET.get("query") params = self.get_filter_params(request, organization) rollup = get_rollup_from_request( request, params, "1h", InvalidSearchQuery( "Your interval and date range would create too many results. " "Use a larger interval, or a smaller date range."), ) # Backwards compatibility for incidents which uses the old # column aliases as it straddles both versions of events/discover. # We will need these aliases until discover2 flags are enabled for all # users. column_map = { "user_count": "count_unique(user)", "event_count": "count()", "rpm()": "rpm(%d)" % rollup, "rps()": "rps(%d)" % rollup, } query_columns = [ column_map.get(column, column) for column in columns ] reference_event = self.reference_event(request, organization, params.get("start"), params.get("end")) result = get_event_stats(query_columns, query, params, rollup, reference_event) except (discover.InvalidSearchQuery, snuba.QueryOutsideRetentionError) as error: raise ParseError(detail=six.text_type(error)) serializer = SnubaTSResultSerializer(organization, None, request.user) if top_events: results = {} for key, event_result in six.iteritems(result): if len(query_columns) > 1: results[key] = self.serialize_multiple_axis( serializer, event_result, columns, query_columns) else: # Need to get function alias if count is a field, but not the axis results[key] = serializer.serialize( event_result, get_function_alias(query_columns[0])) return results elif len(query_columns) > 1: return self.serialize_multiple_axis(serializer, result, columns, query_columns) else: return serializer.serialize(result)
def serialize_multiple_axis(self, serializer, event_result, columns, query_columns): # Return with requested yAxis as the key result = { columns[index]: serializer.serialize( event_result, get_function_alias(query_column), order=index ) for index, query_column in enumerate(query_columns) } # Set order if multi-axis + top events if "order" in event_result.data: result["order"] = event_result.data["order"] return result
def __init__(self, organization_id, discover_query): self.projects = self.get_projects(organization_id, discover_query) self.start, self.end = get_date_range_from_params(discover_query) self.params = { "organization_id": organization_id, "project_id": [project.id for project in self.projects], "start": self.start, "end": self.end, } self.header_fields = map(lambda x: get_function_alias(x), discover_query["field"]) self.data_fn = self.get_data_fn(fields=discover_query["field"], query=discover_query["query"], params=self.params)
def get(self, request, organization): if not features.has("organizations:discover-basic", organization, actor=request.user): return self.get_v1_results(request, organization) try: columns = request.GET.getlist("yAxis", ["count()"]) params = self.get_filter_params(request, organization) rollup = self.get_rollup(request, params) # Backwards compatibility for incidents which uses the old # column aliases as it straddles both versions of events/discover. # We will need these aliases until discover2 flags are enabled for all # users. column_map = { "user_count": "count_unique(user)", "event_count": "count()", "rpm()": "rpm(%d)" % rollup, "rps()": "rps(%d)" % rollup, } query_columns = [ column_map.get(column, column) for column in columns ] result = discover.timeseries_query( selected_columns=query_columns, query=request.GET.get("query"), params=params, rollup=rollup, reference_event=self.reference_event(request, organization, params.get("start"), params.get("end")), referrer="api.organization-event-stats", ) except InvalidSearchQuery as err: raise ParseError(detail=six.text_type(err)) serializer = SnubaTSResultSerializer(organization, None, request.user) if len(columns) > 1: # Return with requested yAxis as the key data = { column: serializer.serialize(result, get_function_alias(query_column)) for column, query_column in zip(columns, query_columns) } else: data = serializer.serialize(result) return Response(data, status=200)
def __init__(self, organization_id, discover_query): self.projects = self.get_projects(organization_id, discover_query) self.environments = self.get_environments(organization_id, discover_query) self.start, self.end = get_date_range_from_params(discover_query) self.params = { "organization_id": organization_id, "project_id": [project.id for project in self.projects], "start": self.start, "end": self.end, } # make sure to only include environment if any are given # an empty list DOES NOT work if self.environments: self.params["environment"] = self.environments self.header_fields = map(lambda x: get_function_alias(x), discover_query["field"]) self.data_fn = self.get_data_fn(fields=discover_query["field"], query=discover_query["query"], params=self.params)
def get_event_stats_data(self, request, organization, get_event_stats): try: columns = request.GET.getlist("yAxis", ["count()"]) query = request.GET.get("query") params = self.get_filter_params(request, organization) rollup = get_rollup_from_request( request, params, "1h", InvalidSearchQuery( "Your interval and date range would create too many results. " "Use a larger interval, or a smaller date range." ), ) # Backwards compatibility for incidents which uses the old # column aliases as it straddles both versions of events/discover. # We will need these aliases until discover2 flags are enabled for all # users. column_map = { "user_count": "count_unique(user)", "event_count": "count()", "rpm()": "rpm(%d)" % rollup, "rps()": "rps(%d)" % rollup, } query_columns = [column_map.get(column, column) for column in columns] reference_event = self.reference_event( request, organization, params.get("start"), params.get("end") ) result = get_event_stats(query_columns, query, params, rollup, reference_event) except InvalidSearchQuery as err: raise ParseError(detail=six.text_type(err)) serializer = SnubaTSResultSerializer(organization, None, request.user) if len(columns) > 1: # Return with requested yAxis as the key return { column: serializer.serialize(result, get_function_alias(query_column)) for column, query_column in zip(columns, query_columns) } else: return serializer.serialize(result)
def validate(self, data): if not data.get("id"): keys = set(data.keys()) if self.required_for_create - keys: raise serializers.ValidationError({ "fields": "fields are required during creation.", "conditions": "conditions are required during creation.", }) # Validate the query that would be created when run. conditions = self._get_attr(data, "conditions", "") fields = self._get_attr(data, "fields", []).copy() orderby = self._get_attr(data, "orderby", "") try: # When using the eps/epm functions, they require an interval argument # or to provide the start/end so that the interval can be computed. # This uses a hard coded start/end to ensure the validation succeeds # since the values themselves don't matter. params = { "start": datetime.now() - timedelta(days=1), "end": datetime.now(), "project_id": [p.id for p in self.context.get("projects")], } snuba_filter = get_filter(conditions, params=params) except InvalidSearchQuery as err: raise serializers.ValidationError( {"conditions": f"Invalid conditions: {err}"}) if orderby: snuba_filter.orderby = get_function_alias(orderby) try: resolve_field_list(fields, snuba_filter) except InvalidSearchQuery as err: raise serializers.ValidationError( {"fields": f"Invalid fields: {err}"}) return data
def find_histogram_min_max(fields, min_value, max_value, user_query, params, data_filter=None): """ Find the min/max value of the specified fields. If either min/max is already specified, it will be used and not queried for. :param [str] fields: The list of fields for which you want to generate the histograms for. :param float min_value: The minimum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param float max_value: The maximum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param str user_query: Filter query string to create conditions from. :param {str: str} params: Filtering parameters with start, end, project_id, environment :param str data_filter: Indicate the filter strategy to be applied to the data. """ if min_value is not None and max_value is not None: return min_value, max_value min_columns = [] max_columns = [] quartiles = [] for field in fields: if min_value is None: min_columns.append(f"min({field})") if max_value is None: max_columns.append(f"max({field})") if data_filter == "exclude_outliers": quartiles.append(f"percentile({field}, 0.25)") quartiles.append(f"percentile({field}, 0.75)") results = query( selected_columns=min_columns + max_columns + quartiles, query=user_query, params=params, limit=1, referrer="api.organization-events-histogram-min-max", ) data = results.get("data") # there should be exactly 1 row in the results, but if something went wrong here, # we force the min/max to be None to coerce an empty histogram if data is None or len(data) != 1: return None, None row = data[0] if min_value is None: min_values = [ row[get_function_alias(column)] for column in min_columns ] min_values = list(filter(lambda v: v is not None, min_values)) min_value = min(min_values) if min_values else None if max_value is None: max_values = [ row[get_function_alias(column)] for column in max_columns ] max_values = list(filter(lambda v: v is not None, max_values)) max_value = max(max_values) if max_values else None fences = [] if data_filter == "exclude_outliers": for field in fields: q1_alias = get_function_alias(f"percentile({field}, 0.25)") q3_alias = get_function_alias(f"percentile({field}, 0.75)") first_quartile = row[q1_alias] third_quartile = row[q3_alias] if (first_quartile is None or third_quartile is None or math.isnan(first_quartile) or math.isnan(third_quartile)): continue interquartile_range = abs(third_quartile - first_quartile) upper_outer_fence = third_quartile + 3 * interquartile_range fences.append(upper_outer_fence) max_fence_value = max(fences) if fences else None candidates = [max_fence_value, max_value] candidates = list(filter(lambda v: v is not None, candidates)) max_value = min(candidates) if candidates else None return min_value, max_value
def prepare_discover_query( selected_columns, query, params, orderby=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}." .format( ", ".join(conditions_not_in_aggregations), error_extra, )) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}." .format( having_clause[0], error_extra, )) if conditions is not None: snuba_filter.conditions.extend(conditions) return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. auto_aggregations (bool) Whether aggregates should be added automatically if they're used in conditions, and there's at least one aggregate already. use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] function_translations = {} with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}." .format( ", ".join(conditions_not_in_aggregations), error_extra, )) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}." .format( having_clause[0], error_extra, )) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results(result, resolved_fields["functions"], translated_columns, snuba_filter, selected_columns)
def get(self, request, organization): if not self.has_feature(organization, request): return Response(status=404) with sentry_sdk.start_span(op="discover.endpoint", description="parse params"): try: params = self.get_snuba_params(request, organization) except NoProjects: return Response([]) vitals = [ vital.lower() for vital in request.GET.getlist("vital", []) ] if len(vitals) == 0: raise ParseError(detail="Need to pass at least one vital") selected_columns = [] aliases = {} for vital in vitals: if vital not in self.VITALS: raise ParseError(detail=f"{vital} is not a valid vital") aliases[vital] = [] for index, threshold in enumerate( self.VITALS[vital]["thresholds"]): column = f"count_at_least({vital}, {threshold})" # Order aliases for later calculation aliases[vital].append(get_function_alias(column)) selected_columns.append(column) selected_columns.append(f"p75({vital})") with self.handle_query_errors(): events_results = discover.query( selected_columns=selected_columns, query=request.GET.get("query"), params=params, # Results should only ever have 1 result limit=1, referrer="api.events.vitals", auto_fields=True, auto_aggregations=True, use_aggregate_conditions=True, ) results = {} if len(events_results["data"]) == 1: event_data = events_results["data"][0] for vital in vitals: groups = len(aliases[vital]) results[vital] = {} total = 0 # Go backwards so that we can subtract and get the running total for i in range(groups - 1, -1, -1): count = event_data[aliases[vital][i]] group_count = 0 if count is None else count - total results[vital][self.LABELS[i]] = group_count total += group_count results[vital]["total"] = total results[vital]["p75"] = event_data.get( get_function_alias(f"p75({vital})")) return Response(results)
def get_event_stats_data( self, request, organization, get_event_stats, top_events=False, query_column="count()", params=None, query=None, ): with self.handle_query_errors(): with sentry_sdk.start_span( op="discover.endpoint", description="base.stats_query_creation"): columns = request.GET.getlist("yAxis", [query_column]) if query is None: query = request.GET.get("query") if params is None: try: # events-stats is still used by events v1 which doesn't require global views params = self.get_snuba_params( request, organization, check_global_views=False) except NoProjects: return {"data": []} rollup = get_rollup_from_request( request, params, "1h", InvalidSearchQuery( "Your interval and date range would create too many results. " "Use a larger interval, or a smaller date range."), ) # Backwards compatibility for incidents which uses the old # column aliases as it straddles both versions of events/discover. # We will need these aliases until discover2 flags are enabled for all # users. column_map = { "user_count": "count_unique(user)", "event_count": "count()", "epm()": "epm(%d)" % rollup, "eps()": "eps(%d)" % rollup, } query_columns = [ column_map.get(column, column) for column in columns ] with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_query"): result = get_event_stats(query_columns, query, params, rollup) serializer = SnubaTSResultSerializer(organization, None, request.user) with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_serialization"): if top_events: results = {} for key, event_result in six.iteritems(result): if len(query_columns) > 1: results[key] = self.serialize_multiple_axis( serializer, event_result, columns, query_columns) else: # Need to get function alias if count is a field, but not the axis results[key] = serializer.serialize( event_result, column=get_function_alias(query_columns[0])) return results elif len(query_columns) > 1: return self.serialize_multiple_axis(serializer, result, columns, query_columns) else: return serializer.serialize(result)
def histogram_query( fields, user_query, params, num_buckets, precision=0, min_value=None, max_value=None, data_filter=None, referrer=None, ): """ API for generating histograms for numeric columns. A multihistogram is possible only if the columns are all array columns. Array columns are columns whose values are nested arrays. Measurements and span op breakdowns are examples of array columns. The resulting histograms will have their bins aligned. :param [str] fields: The list of fields for which you want to generate histograms for. :param str user_query: Filter query string to create conditions from. :param {str: str} params: Filtering parameters with start, end, project_id, environment :param int num_buckets: The number of buckets the histogram should contain. :param int precision: The number of decimal places to preserve, default 0. :param float min_value: The minimum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param float max_value: The maximum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param str data_filter: Indicate the filter strategy to be applied to the data. """ multiplier = int(10**precision) if max_value is not None: # We want the specified max_value to be exclusive, and the queried max_value # to be inclusive. So we adjust the specified max_value using the multiplier. max_value -= 0.1 / multiplier min_value, max_value = find_histogram_min_max(fields, min_value, max_value, user_query, params, data_filter) key_column = None array_column = None histogram_function = None conditions = [] if len(fields) > 1: array_column = check_multihistogram_fields(fields) if array_column == "measurements": key_column = "array_join(measurements_key)" histogram_function = get_measurement_name elif array_column == "span_op_breakdowns": key_column = "array_join(span_op_breakdowns_key)" histogram_function = get_span_op_breakdown_name else: raise InvalidSearchQuery( "multihistogram expected either all measurements or all breakdowns" ) key_alias = get_function_alias(key_column) field_names = [histogram_function(field) for field in fields] conditions.append([key_alias, "IN", field_names]) histogram_params = find_histogram_params(num_buckets, min_value, max_value, multiplier) histogram_column = get_histogram_column(fields, key_column, histogram_params, array_column) histogram_alias = get_function_alias(histogram_column) if min_value is None or max_value is None: return normalize_histogram_results(fields, key_column, histogram_params, {"data": []}, array_column) # make sure to bound the bins to get the desired range of results if min_value is not None: min_bin = histogram_params.start_offset conditions.append([histogram_alias, ">=", min_bin]) if max_value is not None: max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets conditions.append([histogram_alias, "<=", max_bin]) columns = [] if key_column is None else [key_column] results = query( selected_columns=columns + [histogram_column, "count()"], conditions=conditions, query=user_query, params=params, orderby=[histogram_alias], limit=len(fields) * num_buckets, referrer=referrer, functions_acl=["array_join", "histogram"], ) return normalize_histogram_results(fields, key_column, histogram_params, results, array_column)
def measurements_histogram_query( measurements, user_query, params, num_buckets, precision=0, min_value=None, max_value=None, data_filter=None, referrer=None, ): """ API for generating histograms specifically for measurements. This function allows you to generate histograms for multiple measurements at once. The resulting histograms will have their bins aligned. :param [str] measurements: The list of measurements for which you want to generate histograms for. :param str user_query: Filter query string to create conditions from. :param {str: str} params: Filtering parameters with start, end, project_id, environment :param int num_buckets: The number of buckets the histogram should contain. :param int precision: The number of decimal places to preserve, default 0. :param float min_value: The minimum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param float max_value: The maximum value allowed to be in the histogram. If left unspecified, it is queried using `user_query` and `params`. :param str data_filter: Indicate the filter strategy to be applied to the data. """ multiplier = int(10**precision) if max_value is not None: # We want the specified max_value to be exclusive, and the queried max_value # to be inclusive. So we adjust the specified max_value using the multiplier. max_value -= 0.1 / multiplier min_value, max_value = find_measurements_min_max(measurements, min_value, max_value, user_query, params, data_filter) key_col = "array_join(measurements_key)" histogram_params = find_measurements_histogram_params( num_buckets, min_value, max_value, multiplier) histogram_col = get_measurements_histogram_col(histogram_params) conditions = [[get_function_alias(key_col), "IN", measurements]] # make sure to bound the bins as to not get too many results if min_value is not None: min_bin = histogram_params.start_offset conditions.append([get_function_alias(histogram_col), ">=", min_bin]) if max_value is not None: max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets conditions.append([get_function_alias(histogram_col), "<=", max_bin]) results = query( selected_columns=[key_col, histogram_col, "count()"], conditions=conditions, query=user_query, params=params, # the zerofill step assumes it is ordered by the bin then name orderby=[ get_function_alias(histogram_col), get_function_alias(key_col) ], limit=len(measurements) * num_buckets, referrer=referrer, auto_fields=True, use_aggregate_conditions=True, functions_acl=["array_join", "measurements_histogram"], ) return normalize_measurements_histogram(measurements, num_buckets, key_col, histogram_params, results)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, conditions=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # TODO(evanh): These can be removed once we migrate the frontend / saved queries # to use the new function values selected_columns, function_translations = transform_deprecated_functions_in_columns( selected_columns) query = transform_deprecated_functions_in_query(query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: snuba_filter.having = [] # We need to run a separate query to be able to properly bucket the values for the histogram # Do that here, and format the bucket number in to the columns before passing it through # to event search. idx = 0 for col in selected_columns: if col.startswith("histogram("): histogram_column = find_histogram_buckets(col, params, snuba_filter.conditions) selected_columns[idx] = histogram_column function_translations[get_function_alias( histogram_column)] = get_function_alias(col) break idx += 1 # Check to see if we are ordering by any functions and convert the orderby to be the correct alias. if orderby: orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] new_orderby = [] for ordering in orderby: is_reversed = ordering.startswith("-") ordering = ordering.lstrip("-") for snuba_name, sentry_name in six.iteritems( function_translations): if sentry_name == ordering: ordering = snuba_name break ordering = "{}{}".format("-" if is_reversed else "", ordering) new_orderby.append(ordering) snuba_filter.orderby = new_orderby snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_filter.conditions.extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) if conditions is not None: snuba_filter.conditions.extend(conditions) result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) return transform_results(result, translated_columns, snuba_filter, selected_columns)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, conditions=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") else: # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: snuba_filter.having = [] # We need to run a separate query to be able to properly bucket the values for the histogram # Do that here, and format the bucket number in to the columns before passing it through # to event search. idx = 0 function_translations = {} for col in selected_columns: if col.startswith("histogram("): with sentry_sdk.start_span( op="discover.discover", description="query.histogram_calculation") as span: span.set_data("histogram", col) histogram_column = find_histogram_buckets( col, params, snuba_filter.conditions) selected_columns[idx] = histogram_column snuba_name = get_function_alias(histogram_column) sentry_name = get_function_alias(col) function_translations[snuba_name] = sentry_name # Since we're completely renaming the histogram function, we need to also check if we are # ordering by the histogram values, and change that. if orderby is not None: orderby = list(orderby) if isinstance( orderby, (list, tuple)) else [orderby] for i, ordering in enumerate(orderby): if sentry_name == ordering.lstrip("-"): ordering = "{}{}".format( "-" if ordering.startswith("-") else "", snuba_name) orderby[i] = ordering break idx += 1 with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_filter.conditions.extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) else: alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( u"Aggregate(s) {} used in a condition but are not in the selected columns." .format(", ".join(conditions_not_in_aggregations))) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results(result, translated_columns, snuba_filter, selected_columns)