def wip_snql_query( selected_columns, query, params, equations=None, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): """ Replacement API for query using snql, this function is still a work in progress and is not ready for use in production """ builder = QueryBuilder( Dataset.Discover, params, query=query, selected_columns=selected_columns, orderby=orderby, use_aggregate_conditions=use_aggregate_conditions, limit=limit, ) snql_query = builder.get_snql_query() results = raw_snql_query(snql_query, referrer) return results
def test_environment_param(self): self.params["environment"] = ["", "prod"] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"]) self.assertCountEqual( query.where, [ *self.default_conditions, Or([ Condition(Column("environment"), Op.IS_NULL), Condition(Column("environment"), Op.EQ, "prod"), ]), ], ) query.get_snql_query().validate() self.params["environment"] = ["dev", "prod"] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"]) self.assertCountEqual( query.where, [ *self.default_conditions, Condition(Column("environment"), Op.IN, ["dev", "prod"]), ], ) query.get_snql_query().validate()
def test_turbo(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count()", ], turbo=True, ) assert query.turbo.value snql_query = query.get_snql_query() snql_query.validate() assert snql_query.turbo.value
def test_sample_rate(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count()", ], sample_rate=0.1, ) assert query.sample_rate == 0.1 snql_query = query.get_snql_query() snql_query.validate() assert snql_query.match.sample == 0.1
def test_environment_filter(self): query = QueryBuilder( Dataset.Discover, self.params, "environment:prod", ["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("environment"), Op.EQ, "prod"), *self.default_conditions, ], ) query.get_snql_query().validate() query = QueryBuilder( Dataset.Discover, self.params, "environment:[dev, prod]", ["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("environment"), Op.IN, ["dev", "prod"]), *self.default_conditions, ], ) query.get_snql_query().validate()
def test_simple_orderby(self): query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "release"], orderby=["user.email"], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.orderby, [ OrderBy(Function("toString", [Column("email")], "user.email"), Direction.ASC) ], ) query.get_snql_query().validate() query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "release"], orderby=["-user.email"], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.orderby, [ OrderBy(Function("toString", [Column("email")], "user.email"), Direction.DESC) ], ) query.get_snql_query().validate()
def resolve_team_key_transaction_alias( builder: QueryBuilder, resolve_metric_index: bool = False ) -> SelectType: org_id = builder.params.get("organization_id") project_ids = builder.params.get("project_id") team_ids = builder.params.get("team_id") if org_id is None or team_ids is None or project_ids is None: raise TypeError("Team key transactions parameters cannot be None") team_key_transactions = list( TeamKeyTransaction.objects.filter( organization_id=org_id, project_team__in=ProjectTeam.objects.filter( project_id__in=project_ids, team_id__in=team_ids ), ) .order_by("transaction", "project_team__project_id") .values_list("project_team__project_id", "transaction") .distinct("transaction", "project_team__project_id")[ : fields.MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS ] ) count = len(team_key_transactions) if resolve_metric_index: team_key_transactions = [ (project, indexer.resolve(transaction)) for project, transaction in team_key_transactions ] # NOTE: this raw count is not 100% accurate because if it exceeds # `MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS`, it will not be reflected sentry_sdk.set_tag("team_key_txns.count", count) sentry_sdk.set_tag( "team_key_txns.count.grouped", format_grouped_length(count, [10, 100, 250, 500]) ) if count == 0: return Function("toInt8", [0], constants.TEAM_KEY_TRANSACTION_ALIAS) return Function( "in", [ (builder.column("project_id"), builder.column("transaction")), team_key_transactions, ], constants.TEAM_KEY_TRANSACTION_ALIAS, )
def test_spans_columns(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "array_join(spans_op)", "array_join(spans_group)", "sumArray(spans_exclusive_time)", ], functions_acl=["array_join", "sumArray"], ) self.assertCountEqual( query.columns, [ Function("arrayJoin", [Column("spans.op")], "array_join_spans_op"), Function("arrayJoin", [Column("spans.group")], "array_join_spans_group"), Function( "sum", [Function("arrayJoin", [Column("spans.exclusive_time")])], "sumArray_spans_exclusive_time", ), ], )
def release_filter_converter(builder: QueryBuilder, search_filter: SearchFilter) -> WhereType: """Parse releases for potential aliases like `latest`""" if search_filter.value.is_wildcard(): operator = search_filter.operator value = search_filter.value else: operator_conversions = {"=": "IN", "!=": "NOT IN"} operator = operator_conversions.get(search_filter.operator, search_filter.operator) value = SearchValue( reduce( lambda x, y: x + y, [ parse_release( v, builder.params["project_id"], builder.params.get("environment_objects"), builder.params.get("organization_id"), ) for v in to_list(search_filter.value.value) ], [], ) ) return builder._default_filter_converter(SearchFilter(search_filter.key, operator, value))
def test_count_if_with_tags(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count_if(foo,equals,bar)", 'count_if(foo,notEquals,"baz")', ], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.aggregates, [ Function( "countIf", [ Function("equals", [Column("tags[foo]"), "bar"]), ], "count_if_foo_equals_bar", ), Function( "countIf", [ Function("notEquals", [Column("tags[foo]"), "baz"]), ], "count_if_foo_notEquals__baz", ), ], )
def test_count_if(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count_if(event.type,equals,transaction)", 'count_if(event.type,notEquals,"transaction")', ], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.aggregates, [ Function( "countIf", [ Function("equals", [Column("type"), "transaction"]), ], "count_if_event_type_equals_transaction", ), Function( "countIf", [ Function("notEquals", [Column("type"), "transaction"]), ], "count_if_event_type_notEquals__transaction", ), ], )
def test_project_alias_column_with_project_condition(self): project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder( Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["project"] ) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.EQ, project1.id), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) # Because of the condition on project there should only be 1 project in the transform self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id], [project1.slug], "", ], "project", ) ], )
def test_project_alias_column(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["project"]) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.IN, [project1.id, project2.id]), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id, project2.id], [project1.slug, project2.slug], "", ], "project", ) ], )
def test_retention(self): with self.options({"system.event-retention-days": 10}): with self.assertRaises(QueryOutsideRetentionError): QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[], )
def data_fn(offset: int, limit: int) -> Any: builder = QueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=["spans_op", "count()"], array_join="spans_op", query=query, limit=limit, offset=offset, orderby="-count", ) snql_query = builder.get_snql_query() results = raw_snql_query(snql_query, "api.organization-events-span-ops") return [ SpanOp(op=row["spans_op"], count=row["count"]) for row in results["data"] ]
def test_array_combinator_is_private(self): with self.assertRaisesRegexp(InvalidSearchQuery, "sum: no access to private function"): QueryBuilder( Dataset.Discover, self.params, "", selected_columns=["sumArray(measurements_value)"], )
def test_array_combinator_with_non_array_arg(self): with self.assertRaisesRegexp(InvalidSearchQuery, "stuff is not a valid array column"): QueryBuilder( Dataset.Discover, self.params, "", selected_columns=["sumArray(stuff)"], functions_acl=["sumArray"], )
def test_array_join_clause(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "spans_op", "count()", ], array_join="spans_op", ) self.assertCountEqual( query.columns, [ AliasedExpression(Column("spans.op"), "spans_op"), Function("count", [], "count"), ], ) assert query.array_join == Column("spans.op") query.get_snql_query().validate()
def test_orderby_duplicate_columns(self): query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "user.email"], orderby=["user.email"], ) self.assertCountEqual( query.orderby, [OrderBy(Column("email"), Direction.ASC)], )
def test_simple_limitby(self): query = QueryBuilder( dataset=Dataset.Discover, params=self.params, query="", selected_columns=["message"], orderby="message", limitby=("message", 1), limit=4, ) assert query.limitby == LimitBy(Column("message"), 1)
def test_simple_query(self): query = QueryBuilder( Dataset.Discover, self.params, "user.email:[email protected] release:1.2.1", ["user.email", "release"], ) self.assertCountEqual( query.where, [ Condition(Column("email"), Op.EQ, "*****@*****.**"), Condition(Column("release"), Op.EQ, "1.2.1"), *self.default_conditions, ], ) self.assertCountEqual( query.select, [ Function("toString", [Column("email")], "user.email"), Column("release"), ], ) query.get_snql_query().validate()
def team_key_transaction_filter(builder: QueryBuilder, search_filter: SearchFilter) -> WhereType: value = search_filter.value.value key_transaction_expr = builder.resolve_field_alias(constants.TEAM_KEY_TRANSACTION_ALIAS) if search_filter.value.raw_value == "": return Condition( key_transaction_expr, Op.NEQ if search_filter.operator == "!=" else Op.EQ, 0 ) if value in ("1", 1): return Condition(key_transaction_expr, Op.EQ, 1) if value in ("0", 0): return Condition(key_transaction_expr, Op.EQ, 0) raise InvalidSearchQuery( "Invalid value for key_transaction condition. Accepted values are 1, 0" )
def test_project_in_condition_filters_not_in_project_filter(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() # params is assumed to be validated at this point, so this query should be invalid self.params["project_id"] = [project2.id] with self.assertRaisesRegexp( InvalidSearchQuery, re.escape( f"Invalid query. Project(s) {str(project1.slug)} do not exist or are not actively selected." ), ): QueryBuilder( Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["environment"], )
def test_array_combinator(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=["sumArray(measurements_value)"], functions_acl=["sumArray"], ) self.assertCountEqual( query.columns, [ Function( "sum", [Function("arrayJoin", [Column("measurements.value")])], "sumArray_measurements_value", ) ], )
def test_array_join(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=["array_join(measurements_key)", "count()"], functions_acl=["array_join"], ) array_join_column = Function( "arrayJoin", [Column("measurements.key")], "array_join_measurements_key", ) self.assertCountEqual( query.columns, [array_join_column, Function("count", [], "count")]) # make sure the the array join columns are present in gropuby self.assertCountEqual(query.groupby, [array_join_column])
def test_project_in_condition_filters(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder( Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.EQ, project1.id), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], )
def project_slug_converter( builder: QueryBuilder, search_filter: SearchFilter ) -> Optional[WhereType]: """Convert project slugs to ids and create a filter based on those. This is cause we only store project ids in clickhouse. """ value = search_filter.value.value if Op(search_filter.operator) == Op.EQ and value == "": raise InvalidSearchQuery( 'Cannot query for has:project or project:"" as every event will have a project' ) slugs = to_list(value) project_slugs: Mapping[str, int] = { slug: project_id for slug, project_id in builder.project_slugs.items() if slug in slugs } missing: List[str] = [slug for slug in slugs if slug not in project_slugs] if missing and search_filter.operator in constants.EQUALITY_OPERATORS: raise InvalidSearchQuery( f"Invalid query. Project(s) {', '.join(missing)} do not exist or are not actively selected." ) # Sorted for consistent query results project_ids = list(sorted(project_slugs.values())) if project_ids: # Create a new search filter with the correct values converted_filter = builder.convert_search_filter_to_condition( SearchFilter( SearchKey("project.id"), search_filter.operator, SearchValue(project_ids if search_filter.is_in_filter else project_ids[0]), ) ) if converted_filter: if search_filter.operator in constants.EQUALITY_OPERATORS: builder.projects_to_filter.update(project_ids) return converted_filter return None
def resolve_project_slug_alias(builder: QueryBuilder, alias: str) -> SelectType: project_ids = { project_id for project_id in builder.params.get("project_id", []) if isinstance(project_id, int) } # Try to reduce the size of the transform by using any existing conditions on projects # Do not optimize projects list if conditions contain OR operator if not builder.has_or_condition and len(builder.projects_to_filter) > 0: project_ids &= builder.projects_to_filter projects = Project.objects.filter(id__in=project_ids).values("slug", "id") return Function( "transform", [ builder.column("project.id"), [project["id"] for project in projects], [project["slug"] for project in projects], "", ], alias, )
def query_example_transactions( params: ParamsType, query: Optional[str], direction: str, orderby: str, spans: List[Span], per_suspect: int = 5, offset: Optional[int] = None, ) -> Dict[Span, List[EventID]]: # there aren't any suspects, early return to save an empty query if not spans or per_suspect == 0: return {} orderby_columns = SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_sort selected_columns: List[str] = [ "id", "project.id", "project", "array_join(spans_op)", "array_join(spans_group)", *orderby_columns, ] builder = QueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=selected_columns, query=query, orderby=[direction + column for column in orderby_columns], # we want only `per_suspect` examples for each suspect limit=len(spans) * per_suspect, offset=offset, functions_acl=[ "array_join", "sumArray", "percentileArray", "maxArray" ], ) # we are only interested in the specific op, group pairs from the suspects builder.add_conditions([ Condition( Function( "tuple", [ builder.resolve_function("array_join(spans_op)"), builder.resolve_function("array_join(spans_group)"), ], ), Op.IN, Function( "tuple", [ Function("tuple", [suspect.op, suspect.group]) for suspect in spans ], ), ), ]) if len(spans) > 1: # Hack: the limit by clause only allows columns but here we want to # do a limitby on the two array joins. For the time being, directly # do the limitby on the internal snuba name for the span group column # but this should not be relied upon in production, and if two spans # differ only by the span op, this will result in a incorrect query builder.limitby = LimitBy(Column("_snuba_array_join_spans_group"), per_suspect) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-examples") examples: Dict[Span, List[EventID]] = { Span(suspect.op, suspect.group): [] for suspect in spans } for example in results["data"]: key = Span(example["array_join_spans_op"], example["array_join_spans_group"]) value = EventID(example["project.id"], example["project"], example["id"]) examples[key].append(value) return examples
def query_suspect_span_groups( params: ParamsType, fields: List[str], query: Optional[str], span_ops: Optional[List[str]], span_groups: Optional[List[str]], direction: str, orderby: str, limit: int, offset: int, ) -> List[SuspectSpan]: suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby] selected_columns: List[str] = [ column for column in suspect_span_columns.suspect_op_group_columns + fields if not is_equation(column) ] + [ "array_join(spans_op)", "array_join(spans_group)", "count()", "count_unique(id)", ] equations: List[str] = [ strip_equation(column) for column in suspect_span_columns.suspect_op_group_columns if is_equation(column) ] # TODO: This adds all the possible fields to the query by default. However, # due to the way shards aggregate the rows, this can be slow. As an # optimization, allow the fields to be user specified to only get the # necessary aggregations. # # As part of the transition, continue to add all possible fields when its # not specified, but this should be removed in the future. if not fields: for column in SPAN_PERFORMANCE_COLUMNS.values(): for col in column.suspect_op_group_sort: if not col.startswith("equation["): selected_columns.append(col) builder = QueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=selected_columns, equations=equations, query=query, orderby=[ direction + column for column in suspect_span_columns.suspect_op_group_sort ], auto_aggregations=True, use_aggregate_conditions=True, limit=limit, offset=offset, functions_acl=[ "array_join", "sumArray", "percentileArray", "maxArray" ], ) extra_conditions = [] if span_ops: extra_conditions.append( Condition( builder.resolve_function("array_join(spans_op)"), Op.IN, Function("tuple", span_ops), )) if span_groups: extra_conditions.append( Condition( builder.resolve_function("array_join(spans_group)"), Op.IN, Function("tuple", span_groups), )) if extra_conditions: builder.add_conditions(extra_conditions) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-suspects") return [ SuspectSpan( op=suspect["array_join_spans_op"], group=suspect["array_join_spans_group"], frequency=suspect.get("count_unique_id"), count=suspect.get("count"), avg_occurrences=suspect.get("equation[0]"), sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"), p50_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_50"), p75_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_75"), p95_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_95"), p99_exclusive_time=suspect.get( "percentileArray_spans_exclusive_time_0_99"), ) for suspect in results["data"] ]