def get_equation_list(self, organization: Organization, request: Request) -> Sequence[str]: """equations have a prefix so that they can be easily included alongside our existing fields""" return [ strip_equation(field) for field in request.GET.getlist("field")[:] if is_equation(field) ]
def query_suspect_span_groups( params: ParamsType, fields: List[str], query: Optional[str], span_ops: Optional[List[str]], span_groups: Optional[List[str]], direction: str, orderby: str, limit: int, offset: int, ) -> List[SuspectSpan]: suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby] selected_columns: List[str] = [ column for column in suspect_span_columns.suspect_op_group_columns + fields if not is_equation(column) ] + [ "array_join(spans_op)", "array_join(spans_group)", "count()", "count_unique(id)", ] equations: List[str] = [ strip_equation(column) for column in suspect_span_columns.suspect_op_group_columns if is_equation(column) ] # TODO: This adds all the possible fields to the query by default. However, # due to the way shards aggregate the rows, this can be slow. As an # optimization, allow the fields to be user specified to only get the # necessary aggregations. # # As part of the transition, continue to add all possible fields when its # not specified, but this should be removed in the future. if not fields: for column in SPAN_PERFORMANCE_COLUMNS.values(): for col in column.suspect_op_group_sort: if not col.startswith("equation["): selected_columns.append(col) builder = QueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=selected_columns, equations=equations, query=query, orderby=[ direction + column for column in suspect_span_columns.suspect_op_group_sort ], auto_aggregations=True, use_aggregate_conditions=True, limit=limit, offset=offset, functions_acl=[ "array_join", "sumArray", "percentileArray", "maxArray" ], ) extra_conditions = [] if span_ops: extra_conditions.append( Condition( builder.resolve_function("array_join(spans_op)"), Op.IN, Function("tuple", span_ops), )) if span_groups: extra_conditions.append( Condition( builder.resolve_function("array_join(spans_group)"), Op.IN, Function("tuple", span_groups), )) if extra_conditions: builder.add_conditions(extra_conditions) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-suspects") return [ SuspectSpan( op=suspect["array_join_spans_op"], group=suspect["array_join_spans_group"], frequency=suspect.get("count_unique_id"), count=suspect.get("count"), avg_occurrences=suspect.get("equation[0]"), sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"), p50_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_50"), p75_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_75"), p95_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_95"), p99_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_99"), ) for suspect in results["data"] ]
def get_timeseries_snuba_filter(selected_columns, query, params): snuba_filter = get_filter(query, params) if not snuba_filter.start and not snuba_filter.end: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") columns = [] equations = [] for column in selected_columns: if is_equation(column): equations.append(strip_equation(column)) else: columns.append(column) if len(equations) > 0: resolved_equations, updated_columns = resolve_equation_list( equations, columns, aggregates_only=True, auto_add=True) else: resolved_equations = [] updated_columns = columns # For the new apdex, we need to add project threshold config as a selected # column which means the group by for the time series won't work. # As a temporary solution, we will calculate the mean of all the project # level thresholds in the request and use the legacy apdex, user_misery # or count_miserable calculation. # TODO(snql): Alias the project_threshold_config column so it doesn't # have to be in the SELECT statement and group by to be able to use new apdex, # user_misery and count_miserable. threshold = None for agg in CONFIGURABLE_AGGREGATES: if agg not in updated_columns: continue if threshold is None: project_ids = params.get("project_id") threshold_configs = list( ProjectTransactionThreshold.objects.filter( organization_id=params["organization_id"], project_id__in=project_ids, ).values_list("threshold", flat=True)) projects_without_threshold = len(project_ids) - len( threshold_configs) threshold_configs.extend([DEFAULT_PROJECT_THRESHOLD] * projects_without_threshold) threshold = int(mean(threshold_configs)) updated_columns.remove(agg) updated_columns.append( CONFIGURABLE_AGGREGATES[agg].format(threshold=threshold)) snuba_filter.update_with( resolve_field_list(updated_columns, snuba_filter, auto_fields=False, resolved_equations=resolved_equations)) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter) if not snuba_filter.aggregations: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") return snuba_filter, translated_columns
def query_suspect_span_groups( params: ParamsType, fields: List[str], query: Optional[str], span_ops: Optional[List[str]], span_groups: Optional[List[str]], direction: str, orderby: str, limit: int, offset: int, ) -> List[SuspectSpan]: suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby] selected_columns: List[str] = [ column for column in suspect_span_columns.suspect_op_group_columns + fields if not is_equation(column) ] + [ "array_join(spans_op)", "array_join(spans_group)", # want a single event id to fetch from nodestore for the span description "any(id)", ] equations: List[str] = [ strip_equation(column) for column in suspect_span_columns.suspect_op_group_columns + fields if is_equation(column) ] builder = QueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=selected_columns, equations=equations, query=query, orderby=[direction + column for column in suspect_span_columns.suspect_op_group_sort], auto_aggregations=True, use_aggregate_conditions=True, limit=limit, offset=offset, functions_acl=["array_join", "sumArray", "percentileArray", "maxArray"], ) extra_conditions = [] if span_ops: extra_conditions.append( Condition( builder.resolve_function("array_join(spans_op)"), Op.IN, Function("tuple", span_ops), ) ) if span_groups: extra_conditions.append( Condition( builder.resolve_function("array_join(spans_group)"), Op.IN, Function("tuple", span_groups), ) ) if extra_conditions: builder.add_conditions(extra_conditions) snql_query = builder.get_snql_query() results = raw_snql_query(snql_query, "api.organization-events-spans-performance-suspects") return [ SuspectSpan( op=suspect["array_join_spans_op"], group=suspect["array_join_spans_group"], description=get_span_description( EventID(params["project_id"][0], suspect["any_id"]), span_op=suspect["array_join_spans_op"], span_group=suspect["array_join_spans_group"], ), frequency=suspect.get("count_unique_id"), count=suspect.get("count"), avg_occurrences=suspect.get("equation[0]"), sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"), p50_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_50"), p75_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_75"), p95_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_95"), p99_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_99"), ) for suspect in results["data"] ]
def validate(self, data): organization = self.context["organization"] query_info = data["query_info"] # Validate the project field, if provided # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid project_query = query_info.get("project") if project_query: get_projects_by_id = self.context["get_projects_by_id"] # Coerce the query into a set if isinstance(project_query, list): projects = get_projects_by_id(set(map(int, project_query))) else: projects = get_projects_by_id({int(project_query)}) query_info["project"] = [project.id for project in projects] # Discover Pre-processing if data["query_type"] == ExportQueryType.DISCOVER_STR: # coerce the fields into a list as needed base_fields = query_info.get("field", []) if not isinstance(base_fields, list): base_fields = [base_fields] equations = [] fields = [] if self.context.get("has_arithmetic"): for field in base_fields: if is_equation(field): equations.append(strip_equation(field)) else: fields.append(field) else: fields = base_fields if len(base_fields) > MAX_FIELDS: detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again." raise serializers.ValidationError(detail) elif len(base_fields) == 0: raise serializers.ValidationError( "at least one field is required to export") if "query" not in query_info: detail = "query is a required to export, please pass an empty string if you don't want to set one" raise serializers.ValidationError(detail) query_info["field"] = fields query_info["equations"] = equations if not query_info.get("project"): projects = self.context["get_projects"]() query_info["project"] = [project.id for project in projects] # make sure to fix the export start/end times to ensure consistent results try: start, end = get_date_range_from_params(query_info) except InvalidParams as e: sentry_sdk.set_tag("query.error_reason", "Invalid date params") raise serializers.ValidationError(str(e)) if "statsPeriod" in query_info: del query_info["statsPeriod"] if "statsPeriodStart" in query_info: del query_info["statsPeriodStart"] if "statsPeriodEnd" in query_info: del query_info["statsPeriodEnd"] query_info["start"] = start.isoformat() query_info["end"] = end.isoformat() # validate the query string by trying to parse it processor = DiscoverProcessor( discover_query=query_info, organization_id=organization.id, ) try: snuba_filter = get_filter(query_info["query"], processor.params) if len(equations) > 0: resolved_equations, _ = resolve_equation_list( equations, fields) else: resolved_equations = [] resolve_field_list( fields.copy(), snuba_filter, auto_fields=True, auto_aggregations=True, resolved_equations=resolved_equations, ) except InvalidSearchQuery as err: raise serializers.ValidationError(str(err)) return data