def test_count_if_with_tags(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count_if(foo,equals,bar)", 'count_if(foo,notEquals,"baz")', ], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.aggregates, [ Function( "countIf", [ Function("equals", [Column("tags[foo]"), "bar"]), ], "count_if_foo_equals_bar", ), Function( "countIf", [ Function("notEquals", [Column("tags[foo]"), "baz"]), ], "count_if_foo_notEquals__baz", ), ], )
def test_count_if(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count_if(event.type,equals,transaction)", 'count_if(event.type,notEquals,"transaction")', ], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.aggregates, [ Function( "countIf", [ Function("equals", [Column("type"), "transaction"]), ], "count_if_event_type_equals_transaction", ), Function( "countIf", [ Function("notEquals", [Column("type"), "transaction"]), ], "count_if_event_type_notEquals__transaction", ), ], )
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def test_simple_orderby(self): query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "release"], orderby=["user.email"], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.orderby, [ OrderBy(Function("toString", [Column("email")], "user.email"), Direction.ASC) ], ) query.get_snql_query().validate() query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "release"], orderby=["-user.email"], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.orderby, [ OrderBy(Function("toString", [Column("email")], "user.email"), Direction.DESC) ], ) query.get_snql_query().validate()
def resolve_span_function(self, function: str, span: Span, alias: str): op = span.op group = span.group return Function( "arrayReduce", [ f"{function}If", self.column("spans_exclusive_time"), Function( "arrayMap", [ Lambda( ["x", "y"], Function( "and", [ Function("equals", [Identifier("x"), op]), Function("equals", [Identifier("y"), group]), ], ), ), self.column("spans_op"), self.column("spans_group"), ], ), ], alias, )
def get_event_stats( query_columns: Sequence[str], query: str, params: Dict[str, str], rollup: int, zerofill_results: bool, comparison_delta: Optional[datetime] = None, ) -> SnubaTSResult: with sentry_sdk.start_span( op="discover.discover", description="timeseries.filter_transform"): builder = TimeseriesQueryBuilder( Dataset.Discover, params, rollup, query=query, selected_columns=query_columns, functions_acl=[ "array_join", "percentileArray", "sumArray" ], ) span_op_column = builder.resolve_function( "array_join(spans_op)") span_group_column = builder.resolve_function( "array_join(spans_group)") # Adding spans.op and spans.group to the group by because # We need them in the query to help the array join optimizer # in snuba take effect but the TimeseriesQueryBuilder # removes all non aggregates from the select clause. builder.groupby.extend([span_op_column, span_group_column]) builder.add_conditions([ Condition( Function("tuple", [span_op_column, span_group_column]), Op.IN, Function("tuple", [Function("tuple", [span.op, span.group])]), ), ]) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-stats") with sentry_sdk.start_span( op="discover.discover", description="timeseries.transform_results"): result = discover.zerofill( results["data"], params["start"], params["end"], rollup, "time", ) return SnubaTSResult({"data": result}, params["start"], params["end"], rollup)
def test_spans_columns(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "array_join(spans_op)", "array_join(spans_group)", "sumArray(spans_exclusive_time)", ], functions_acl=["array_join", "sumArray"], ) self.assertCountEqual( query.columns, [ Function("arrayJoin", [Column("spans.op")], "array_join_spans_op"), Function("arrayJoin", [Column("spans.group")], "array_join_spans_group"), Function( "sum", [Function("arrayJoin", [Column("spans.exclusive_time")])], "sumArray_spans_exclusive_time", ), ], )
def test_sort_count(self, mock_raw_snql_query): event = self.create_event() mock_raw_snql_query.side_effect = [ { "data": [ self.suspect_span_group_snuba_results( "django.view", event), self.suspect_span_group_snuba_results( "django.middleware", event), self.suspect_span_group_snuba_results( "http.server", event), ], }, ] with self.feature(self.FEATURES): response = self.client.get( self.url, data={ "project": self.project.id, "sort": "-count", }, format="json", ) assert response.status_code == 200, response.content self.assert_suspect_span( response.data, [ self.suspect_span_results("django.view", event), self.suspect_span_results("django.middleware", event), self.suspect_span_results("http.server", event), ], ) assert mock_raw_snql_query.call_count == 1 # the first call is the get the suspects, and should be using the specified sort assert mock_raw_snql_query.call_args_list[0][0][0].orderby == [ OrderBy(exp=Function("count", [], "count"), direction=Direction.DESC), OrderBy( exp=Function( "sum", [Function("arrayJoin", [Column("spans.exclusive_time")])], "sumArray_spans_exclusive_time", ), direction=Direction.DESC, ), ] assert (mock_raw_snql_query.call_args_list[0][0][1] == "api.organization-events-spans-performance-suspects")
def test_invalid_subquery() -> None: with pytest.raises( InvalidQueryError, match=re.escape( "inner query is invalid: query must have at least one expression in select" ), ): Query("discover", Query(dataset="discover", match=Entity("events"))).set_select( [Column("event_id"), Column("title")]) with pytest.raises( InvalidQueryError, match=re.escape( "inner query is invalid: query must have at least one expression in select" ), ): Query( "discover", Query( dataset="discover", match=Entity("events"), select=[Column("title"), Column("timestamp")], ), ).set_match(Query(dataset="discover", match=Entity("events"))).set_select([ Function("uniq", [Column("new_event")], "uniq_event"), Column("title") ])
def test_project_alias_column_with_project_condition(self): project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder( Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["project"] ) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.EQ, project1.id), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) # Because of the condition on project there should only be 1 project in the transform self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id], [project1.slug], "", ], "project", ) ], )
def test_project_alias_column(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["project"]) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.IN, [project1.id, project2.id]), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id, project2.id], [project1.slug, project2.slug], "", ], "project", ) ], )
def test_join_query(self) -> None: ev = Entity("events", "ev") gm = Entity("groupedmessage", "gm") join = Join([Relationship(ev, "grouped", gm)]) query = (Query("discover", join).set_select([ Column("group_id", ev), Column("status", gm), Function("avg", [Column("retention_days", ev)], "avg"), ]).set_groupby([Column("group_id", ev), Column("status", gm)]).set_where([ Condition(Column("project_id", ev), Op.EQ, self.project_id), Condition(Column("project_id", gm), Op.EQ, self.project_id), Condition(Column("timestamp", ev), Op.GTE, self.base_time), Condition(Column("timestamp", ev), Op.LT, self.next_time), ])) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def test_correct_times_seen_aggregate(self): query = _make_query( "statsPeriod=6h&interval=10m&groupBy=category&field=sum(times_seen)", {"organization_id": 1}, True, ) assert Function("count()", [Column("times_seen")], "times_seen") in query.select_params query = _make_query( "statsPeriod=6h&interval=1d&groupBy=category&field=sum(times_seen)", {"organization_id": 1}, True, ) assert Function("sum", [Column("times_seen")], "times_seen") in query.select_params
def _to_column(query_func: SessionsQueryFunction) -> SelectableExpression: """ Converts query a function into an expression that can be directly plugged into anywhere columns are used (like the select argument of a Query) """ # distribution columns if query_func in [ "p50(session.duration)", "p75(session.duration)", "p90(session.duration)", "p95(session.duration)", "p99(session.duration)", ]: return Function( alias="percentiles", function="quantiles(0.5,0.75,0.9,0.95,0.99)", parameters=[Column("value")], ) if query_func == "avg(session.duration)": return Function( alias="avg", function="avg", parameters=[Column("value")], ) if query_func == "max(session.duration)": return Function( alias="max", function="max", parameters=[Column("value")], ) # counters if query_func == "sum(session)": return Function( alias="sum", function="sum", parameters=[Column("value")], ) # sets if query_func == "count_unique(user)": return Function( alias="count_unique", function="uniq", parameters=[Column("value")], ) raise ValueError("Unmapped metrics column", query_func)
def test_array_join(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=["array_join(measurements_key)", "count()"], functions_acl=["array_join"], ) array_join_column = Function( "arrayJoin", [Column("measurements.key")], "array_join_measurements_key", ) self.assertCountEqual( query.columns, [array_join_column, Function("count", [], "count")]) # make sure the the array join columns are present in gropuby self.assertCountEqual(query.groupby, [array_join_column])
def to_func() -> Function: params = [] for param in parameters: if isinstance(param, types.FunctionType): params.append(param()) else: params.append(param) return Function(function, params, alias)
def test_array_combinator(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=["sumArray(measurements_value)"], functions_acl=["sumArray"], ) self.assertCountEqual( query.columns, [ Function( "sum", [Function("arrayJoin", [Column("measurements.value")])], "sumArray_measurements_value", ) ], )
def convert_search_filter_to_condition( self, search_filter: SearchFilter, ) -> Optional[WhereType]: key_conversion_map: Mapping[str, Callable[[SearchFilter, str], WhereType]] = { "environment": self._environment_filter_converter, } name = search_filter.key.name value = search_filter.value.value # We want to use group_id elsewhere so shouldn't be removed from the dataset # but if a user has a tag with the same name we want to make sure that works if name == "group_id": name = f"tags[{name}]" if name in NO_CONVERSION_FIELDS: return elif name in key_conversion_map: return key_conversion_map[name](search_filter, name) elif name in self.field_allowlist: lhs = self.column(name) # Handle checks for existence if search_filter.operator in ("=", "!=") and search_filter.value.value == "": if search_filter.key.is_tag: return Condition(lhs, Op(search_filter.operator), value) else: # If not a tag, we can just check that the column is null. return Condition(Function("ifNull", [lhs]), Op(search_filter.operator), 1) if search_filter.value.is_wildcard(): condition = Condition( Function("match", [lhs, f"'(?i){value}'"]), Op(search_filter.operator), 1, ) else: condition = Condition(lhs, Op(search_filter.operator), value) return condition else: raise NotImplementedError(f"{name} not implemented in snql filter parsing yet")
def build_key_errors(interval, project): start, stop = interval # Take the 3 most frequently occuring events query = Query( dataset=Dataset.Events.value, match=Entity("events"), select=[Column("group_id"), Function("count", [])], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("group_id")], orderby=[OrderBy(Function("count", []), Direction.DESC)], limit=Limit(3), ) query_result = raw_snql_query(query, referrer="reports.key_errors") key_errors = query_result["data"] return [(e["group_id"], e["count()"]) for e in key_errors]
def build_project_usage_outcomes(start__stop, project): start, stop = start__stop # XXX(epurkhiser): Tsdb used to use day buckets, where the end would # represent a whole day. Snuba queries more accurately thus we must # capture the entire last day end = stop + timedelta(days=1) query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("outcome"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition( Column("outcome"), Op.IN, [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("outcome"), Column("category")], granularity=Granularity(ONE_DAY), ) data = raw_snql_query(query, referrer="reports.outcomes")["data"] return ( # Accepted errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.ACCEPTED), # Dropped errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.RATE_LIMITED), # accepted transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.ACCEPTED), # Dropped transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.RATE_LIMITED), )
def test_tags_in_groupby(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("tags[k8s-app]")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("tags[k8s-app]"), Op.NEQ, ""), Condition(Column("type"), Op.NEQ, "transaction"), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def test_escape_edge_cases(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "times_seen")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"), ])) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def parse_exp(value: Any) -> Any: """ Takes a legacy expression and converts it to an equivalent SDK Expression. :param value: A legacy expression. :type value: Any """ if isinstance(value, str): # Legacy sends raw strings in single quotes, so strip enclosing quotes only if not value: return value elif value.startswith("'") and value.endswith("'"): value = value[1:-1] return parse_string(value) return Column(value) elif not isinstance(value, list): return parse_scalar(value) if is_condition(value): return parse_condition_to_function(value) alias = value[2] if len(value) > 2 else None if alias and alias.startswith("`") and alias.endswith("`"): alias = alias[1:-1] if value[0].endswith("()") and not value[1]: return Function(value[0].strip("()"), [], alias) if not value[0].endswith(")") and not value[1]: # ["count", None, "count"] return Function(value[0], [], alias) children = None if isinstance(value[1], list): children = list(map(parse_exp, value[1])) elif value[1]: children = [parse_exp(value[1])] return Function(value[0], children, alias)
def aliased_column(self, name: str, alias: str) -> SelectType: """Given an unresolved sentry name and an expected alias, return a snql column that will be aliased to the expected alias. :param name: The unresolved sentry name. :param alias: The expected alias in the result. """ # TODO: This method should use an aliased column from the SDK once # that is available to skip these hacks that we currently have to # do aliasing. resolved = self.resolve_column_name(name) column = Column(resolved) # If the expected alias is identical to the resolved snuba column, # no need to do this aliasing trick. # # Additionally, tags of the form `tags[...]` can't be aliased again # because it confuses the sdk. if alias == resolved: return column if alias in ARRAY_FIELDS: # since the array fields are already flattened, we can use # `arrayFlatten` to alias it return Function("arrayFlatten", [column], alias) if TAG_KEY_RE.search(resolved): # since tags are strings, we can use `toString` to alias it return Function("toString", [column], alias) # string type arguments if alias in {"user.email"}: return Function("toString", [column], alias) # columns that are resolved into a snuba name are not supported raise NotImplementedError( f"{alias} not implemented in snql column resolution yet")
def test_arrayjoin(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("exception_frames.filename")]).set_array_join( Column("exception_frames.filename")).set_where([ Condition(Column("exception_frames.filename"), Op.LIKE, "%.java"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert len(data["data"]) == 6
def test_simple_query(self) -> None: query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count")]).set_groupby( [Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy(Function("count", [], "count"), Direction.ASC) ]).set_limit(1000).set_consistent(True).set_debug(True)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["stats"]["consistent"] assert data["data"] == [{ "count": 1, "tags[custom_tag]": "custom_value", "project_id": self.project_id, }]
def test_group_filter(self, mock_raw_snql_query): event = self.create_event() mock_raw_snql_query.side_effect = [ { "data": [ self.suspect_span_group_snuba_results( "django.middleware", event) ] }, ] with self.feature(self.FEATURES): response = self.client.get( self.url, data={ "project": self.project.id, "spanGroup": "cd" * 8, }, format="json", ) assert response.status_code == 200, response.content self.assert_suspect_span( response.data, [self.suspect_span_results("django.middleware", event)], ) assert mock_raw_snql_query.call_count == 1 # the first call should also contain the additional condition on the span op assert (Condition( lhs=Function("arrayJoin", [Column("spans.group")], "array_join_spans_group"), op=Op.IN, rhs=Function("tuple", ["cd" * 8]), ) in mock_raw_snql_query.call_args_list[0][0][0].where)
def test_basic(self) -> None: now = datetime.now() self._insert_event_for_time(now) query = (Query(dataset="events", match=Entity("events")).set_select([ Function("count", [], "count") ]).set_groupby([Column("project_id")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)), ])) result = snuba.raw_snql_query(query) assert len(result["data"]) == 1 assert result["data"][0] == {"count": 1, "project_id": self.project.id}
def _get_project_releases_count( organization_id: int, project_ids: Sequence[int], scope: str, stats_period: Optional[str] = None, environments: Optional[Sequence[str]] = None, ) -> int: """ Fetches the total count of releases/project combinations """ if stats_period is None: stats_period = "24h" # Special rule that we support sorting by the last 24h only. if scope.endswith("_24h"): stats_period = "24h" _, stats_start, _ = get_rollup_starts_and_buckets(stats_period) where = [ Condition(Column("started"), Op.GTE, stats_start), Condition(Column("started"), Op.LT, datetime.now()), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("org_id"), Op.EQ, organization_id), ] if environments is not None: where.append(Condition(Column("environment"), Op.IN, environments)) having = [] # Filter out releases with zero users when sorting by either `users` or `crash_free_users` if scope in ["users", "crash_free_users"]: having.append(Condition(Column("users"), Op.GT, 0)) query = Query( dataset="sessions", match=Entity("sessions"), select=[ Function( "uniqExact", [Column("release"), Column("project_id")], alias="count") ], where=where, having=having, ) data = snuba.raw_snql_query( query, referrer="snuba.sessions.get_project_releases_count")["data"] return data[0]["count"] if data else 0
def query_p95(interval): start, stop = interval query = Query( dataset=Dataset.Transactions.value, match=Entity("transactions"), select=[ Column("transaction_name"), Function("quantile(0.95)", [Column("duration")], "p95"), ], where=[ Condition(Column("finish_ts"), Op.GTE, start), Condition(Column("finish_ts"), Op.LT, stop + timedelta(days=1)), Condition(Column("transaction_name"), Op.IN, transaction_names), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("transaction_name")], ) return raw_snql_query(query, referrer="reports.key_transactions.p95")