def test_simple_orderby(self): query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "release"], orderby=["user.email"], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.orderby, [ OrderBy(Function("toString", [Column("email")], "user.email"), Direction.ASC) ], ) query.get_snql_query().validate() query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "release"], orderby=["-user.email"], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.orderby, [ OrderBy(Function("toString", [Column("email")], "user.email"), Direction.DESC) ], ) query.get_snql_query().validate()
def test_orderby(exp: Any, direction: Any, exception: Optional[Exception]) -> None: if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): OrderBy(exp, direction) else: assert OrderBy(exp, direction)
def test_sort_count(self, mock_raw_snql_query): event = self.create_event() mock_raw_snql_query.side_effect = [ { "data": [ self.suspect_span_group_snuba_results( "django.view", event), self.suspect_span_group_snuba_results( "django.middleware", event), self.suspect_span_group_snuba_results( "http.server", event), ], }, ] with self.feature(self.FEATURES): response = self.client.get( self.url, data={ "project": self.project.id, "sort": "-count", }, format="json", ) assert response.status_code == 200, response.content self.assert_suspect_span( response.data, [ self.suspect_span_results("django.view", event), self.suspect_span_results("django.middleware", event), self.suspect_span_results("http.server", event), ], ) assert mock_raw_snql_query.call_count == 1 # the first call is the get the suspects, and should be using the specified sort assert mock_raw_snql_query.call_args_list[0][0][0].orderby == [ OrderBy(exp=Function("count", [], "count"), direction=Direction.DESC), OrderBy( exp=Function( "sum", [Function("arrayJoin", [Column("spans.exclusive_time")])], "sumArray_spans_exclusive_time", ), direction=Direction.DESC, ), ] assert (mock_raw_snql_query.call_args_list[0][0][1] == "api.organization-events-spans-performance-suspects")
def test_orderby(self) -> None: self.project_id3 = next(self.id_iter) self.org_id2 = next(self.id_iter) self.generate_session_events(self.org_id2, self.project_id3) query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), orderby=[OrderBy(Column("org_id"), Direction.ASC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2 assert data["data"][2]["org_id"] == self.org_id2 assert data["data"][2]["project_id"] == self.project_id3 query = query.set_orderby( [OrderBy(Column("org_id"), Direction.DESC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id2 assert data["data"][0]["project_id"] == self.project_id3 assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id assert data["data"][2]["org_id"] == self.org_id assert data["data"][2]["project_id"] == self.project_id2
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def generate_orderby_clause(self, direction: Direction, **kwargs: Any) -> List[OrderBy]: return [ OrderBy( self.generate_select_statements(entity=self.get_entity())[0], direction, ) ]
def _build_orderby( self, query_definition: QueryDefinition, entity: str ) -> Optional[List[OrderBy]]: if query_definition.orderby is None: return None (op, metric_name), direction = query_definition.orderby snuba_field = _OP_TO_FIELD[entity][op] return [OrderBy(Column(snuba_field.snuba_alias), direction)]
def test_orderby_duplicate_columns(self): query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "user.email"], orderby=["user.email"], ) self.assertCountEqual( query.orderby, [OrderBy(Column("email"), Direction.ASC)], )
def generate_orderby_clause(self, direction: Direction, projects: Sequence[Project]) -> List[OrderBy]: if not projects: self.__raise_entity_validation_exception("generate_orderby_clause") self.get_entity(projects=projects) return [ OrderBy( self.generate_select_statements(projects=projects)[0], direction, ) ]
def get_snql_query(self) -> Query: return Query( dataset=self.dataset.value, match=Entity(self.dataset.value), select=self.select, where=self.where, having=self.having, groupby=self.groupby, orderby=[OrderBy(self.time_column, Direction.ASC)], granularity=self.granularity, limit=self.limit, )
def __init__( self, num_buckets: int, histogram_column: str, histogram_rows: Optional[int], histogram_params: HistogramParams, key_column: Optional[str], field_names: Optional[List[Union[str, Any, None]]], groupby: Optional[List[str]], *args: Any, **kwargs: Any, ): kwargs["functions_acl"] = kwargs.get("functions_acl", []) + self.base_function_acl super().__init__(*args, **kwargs) self.additional_groupby = groupby selected_columns = kwargs["selected_columns"] resolved_histogram = self.resolve_column(histogram_column) # Reset&Ignore the columns from the QueryBuilder self.aggregates: List[CurriedFunction] = [] self.columns = [self.resolve_column("count()"), resolved_histogram] if key_column is not None and field_names is not None: key_values: List[str] = [ field for field in field_names if isinstance(field, str) ] self.where.append( Condition(self.resolve_column(key_column), Op.IN, key_values)) # make sure to bound the bins to get the desired range of results min_bin = histogram_params.start_offset self.where.append(Condition(resolved_histogram, Op.GTE, min_bin)) max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets self.where.append(Condition(resolved_histogram, Op.LTE, max_bin)) if key_column is not None: self.columns.append(self.resolve_column(key_column)) groups = len( selected_columns) if histogram_rows is None else histogram_rows self.limit = Limit(groups * num_buckets) self.orderby = (self.orderby if self.orderby else []) + [OrderBy(resolved_histogram, Direction.ASC)]
def build_key_errors(interval, project): start, stop = interval # Take the 3 most frequently occuring events query = Query( dataset=Dataset.Events.value, match=Entity("events"), select=[Column("group_id"), Function("count", [])], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("group_id")], orderby=[OrderBy(Function("count", []), Direction.DESC)], limit=Limit(3), ) query_result = raw_snql_query(query, referrer="reports.key_errors") key_errors = query_result["data"] return [(e["group_id"], e["count()"]) for e in key_errors]
def test_sessions_query(self) -> None: query = (Query("sessions", Entity("sessions")).set_select( [Column("project_id"), Column("release")]).set_groupby( [Column("project_id"), Column("release")]).set_where([ Condition(Column("project_id"), Op.IN, [self.project_id]), Condition(Column("org_id"), Op.EQ, self.org_id), Condition( Column("started"), Op.GTE, datetime(2021, 1, 1, 17, 5, 59, 554860), ), Condition(Column("started"), Op.LT, datetime(2022, 1, 1, 17, 6, 0, 554981)), ]).set_orderby([OrderBy(Column("sessions"), Direction.DESC)]).set_limit(100)) response = self.post("/sessions/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def test_tags_in_groupby(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("tags[k8s-app]")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("tags[k8s-app]"), Op.NEQ, ""), Condition(Column("type"), Op.NEQ, "transaction"), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def test_arrayjoin(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("exception_frames.filename")]).set_array_join( Column("exception_frames.filename")).set_where([ Condition(Column("exception_frames.filename"), Op.LIKE, "%.java"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert len(data["data"]) == 6
def test_simple_query(self) -> None: query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count")]).set_groupby( [Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy(Function("count", [], "count"), Direction.ASC) ]).set_limit(1000).set_consistent(True).set_debug(True)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["stats"]["consistent"] assert data["data"] == [{ "count": 1, "tags[custom_tag]": "custom_value", "project_id": self.project_id, }]
def build_project_series(start__stop, project): start, stop = start__stop rollup = ONE_DAY resolution, series = tsdb.get_optimal_rollup_series(start, stop, rollup) assert resolution == rollup, "resolution does not match requested value" clean = partial(clean_series, start, stop, rollup) def zerofill_clean(data): return clean(zerofill(data, start, stop, rollup, fill_default=0)) # Note: this section can be removed issue_ids = project.group_set.filter(status=GroupStatus.RESOLVED, resolved_at__gte=start, resolved_at__lt=stop).values_list( "id", flat=True) # TODO: The TSDB calls could be replaced with a SnQL call here tsdb_range_resolved = _query_tsdb_groups_chunked(tsdb.get_range, issue_ids, start, stop, rollup) resolved_error_series = reduce( merge_series, map(clean, tsdb_range_resolved.values()), clean([(timestamp, 0) for timestamp in series]), ) # end # Use outcomes to compute total errors and transactions outcomes_query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("time"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition(Column("outcome"), Op.EQ, Outcome.ACCEPTED), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("time"), Column("category")], granularity=Granularity(rollup), orderby=[OrderBy(Column("time"), Direction.ASC)], ) outcome_series = raw_snql_query(outcomes_query, referrer="reports.outcome_series") total_error_series = OrderedDict() for v in outcome_series["data"]: if v["category"] in DataCategory.error_categories(): timestamp = int(to_timestamp(parse_snuba_datetime(v["time"]))) total_error_series[timestamp] = total_error_series.get( timestamp, 0) + v["total"] total_error_series = zerofill_clean(list(total_error_series.items())) transaction_series = [(int(to_timestamp(parse_snuba_datetime(v["time"]))), v["total"]) for v in outcome_series["data"] if v["category"] == DataCategory.TRANSACTION] transaction_series = zerofill_clean(transaction_series) error_series = merge_series( resolved_error_series, total_error_series, lambda resolved, total: (resolved, total - resolved), # Resolved, Unresolved ) # Format of this series: [(resolved , unresolved, transactions)] return merge_series( error_series, transaction_series, lambda errors, transactions: errors + (transactions, ), )
def test_sort_percentiles(self, mock_raw_snql_query): event = self.create_event() for i, sort in enumerate([ "p50ExclusiveTime", "p75ExclusiveTime", "p95ExclusiveTime", "p99ExclusiveTime", ]): mock_raw_snql_query.side_effect = [ { "data": [ self.suspect_span_group_snuba_results( "http.server", event), self.suspect_span_group_snuba_results( "django.middleware", event), self.suspect_span_group_snuba_results( "django.view", event), ], }, ] with self.feature(self.FEATURES): response = self.client.get( self.url, data={ "project": self.project.id, "sort": f"-{sort}", }, format="json", ) assert response.status_code == 200, response.content self.assert_suspect_span( response.data, [ self.suspect_span_results("http.server", event), self.suspect_span_results("django.middleware", event), self.suspect_span_results("django.view", event), ], ) percentile = sort[1:3] assert mock_raw_snql_query.call_count == i + 1 # the first call is the get the suspects, and should be using the specified sort assert mock_raw_snql_query.call_args_list[i][0][0].orderby == [ OrderBy( exp=Function( f"quantile(0.{percentile.rstrip('0')})", [ Function("arrayJoin", [Column("spans.exclusive_time")]) ], f"percentileArray_spans_exclusive_time_0_{percentile}", ), direction=Direction.DESC, ) ] assert (mock_raw_snql_query.call_args_list[i][0][1] == "api.organization-events-spans-performance-suspects")
def __get_release_from_filters( org_id, project_id, release, scope, scope_value, stats_start, scope_operation, scope_direction, release_operation, release_direction, limit, crash_free_function=None, environments=None, ): """ Helper function that based on the passed args, constructs a snuba query and runs it to fetch a release Inputs: * org_id: Organisation Id * project_id * release: release version * scope: Sort order criteria -> sessions, users, crash_free_sessions, crash_free_users * scope_value: The value/count of the scope argument * stats_period: duration * scope_operation: Indicates which operation should be used to compare the releases' scope value with current release scope value. either Op.GT or Op.LT * scope_direction: Indicates which ordering should be used to order releases' scope value by either ASC or DESC * release_operation: Indicates which operation should be used to compare the releases' version with current release version. either Op.GT or Op.LT * release_direction: Indicates which ordering should be used to order releases' version by either ASC or DESC * crash_free_function: optional arg that is passed when a function needs to be applied in query like in the case of crash_free_sessions and crash_free_users * environments Return: List of releases that either contains one release or none at all """ release_conditions = [ Condition(Column("started"), Op.GTE, stats_start), Condition( Column("started"), Op.LT, datetime.utcnow(), ), Condition(Column("project_id"), Op.EQ, project_id), Condition(Column("org_id"), Op.EQ, org_id), ] if environments is not None: release_conditions.append(Condition(Column("environment"), Op.IN, environments)) # Get select statements and append to the select statement list a function if # crash_free_option whether it is crash_free_users or crash_free_sessions is picked select = [ Column("project_id"), Column("release"), ] if crash_free_function is not None: select.append(crash_free_function) having = [ Or( conditions=[ Condition(Column(scope), scope_operation, scope_value), And( conditions=[ Condition(Column(scope), Op.EQ, scope_value), Condition(Column("release"), release_operation, release), ] ), ] ) ] orderby = [ OrderBy(direction=scope_direction, exp=Column(scope)), OrderBy(direction=release_direction, exp=Column("release")), ] query = ( Query( dataset=Dataset.Sessions.value, match=Entity("sessions"), select=select, where=release_conditions, having=having, groupby=[Column("release"), Column("project_id")], orderby=orderby, ) .set_limit(limit) .set_offset(0) ) return raw_snql_query(query, referrer="sessions.get_prev_or_next_release")["data"]
def json_to_snql(body: Mapping[str, Any], entity: str) -> Query: """ This will output a Query object that matches the Legacy query body that was passed in. The entity is necessary since the SnQL API requires an explicit entity. This doesn't support subquery or joins. :param body: The legacy API body. :type body: Mapping[str, Any] :param entity: The name of the entity being queried. :type entity: str :raises InvalidExpressionError, InvalidQueryError: If the legacy body is invalid, the SDK will raise an exception. """ dataset = body.get("dataset") or entity sample = body.get("sample") if sample is not None: sample = float(sample) query = Query(dataset, Entity(entity, None, sample)) selected_columns = [] for a in body.get("aggregations", []): selected_columns.append(parse_exp(list(a))) selected = [] for s in body.get("selected_columns", []): if isinstance(s, tuple): selected.append(list(s)) else: selected.append(s) selected_columns.extend(list(map(parse_exp, selected))) arrayjoin = body.get("arrayjoin") if arrayjoin: query = query.set_array_join([Column(arrayjoin)]) query = query.set_select(selected_columns) groupby = body.get("groupby", []) if groupby and not isinstance(groupby, list): groupby = [groupby] parsed_groupby = [] for g in groupby: if isinstance(g, tuple): g = list(g) parsed_groupby.append(parse_exp(g)) query = query.set_groupby(parsed_groupby) conditions: list[Union[Or, Condition]] = [] if body.get("organization"): org_cond = parse_extension_condition("org_id", body["organization"]) if org_cond: conditions.append(org_cond) assert isinstance(query.match, Entity) time_column = get_required_time_column(query.match.name) if time_column: time_cols = (("from_date", Op.GTE), ("to_date", Op.LT)) for col, op in time_cols: date_val = body.get(col) if date_val: conditions.append( Condition(Column(time_column), op, parse_datetime(date_val))) if body.get("project"): proj_cond = parse_extension_condition("project_id", body["project"], True) if proj_cond: conditions.append(proj_cond) for cond in body.get("conditions", []): if not is_condition(cond): or_conditions = [] for or_cond in cond: or_conditions.append(parse_condition(or_cond)) if len(or_conditions) > 1: conditions.append(Or(or_conditions)) else: conditions.extend(or_conditions) else: conditions.append(parse_condition(cond)) query = query.set_where(conditions) having: list[Union[Or, Condition]] = [] for cond in body.get("having", []): if not is_condition(cond): or_conditions = [] for or_cond in cond: or_conditions.append(parse_condition(or_cond)) having.append(Or(or_conditions)) else: having.append(parse_condition(cond)) query = query.set_having(having) order_by = body.get("orderby") if order_by: if not isinstance(order_by, list): order_by = [order_by] order_bys = [] for o in order_by: direction = Direction.ASC if isinstance(o, list): first = o[0] if isinstance(first, str) and first.startswith("-"): o[0] = first.lstrip("-") direction = Direction.DESC part = parse_exp(o) elif isinstance(o, str): if o.startswith("-"): direction = Direction.DESC part = parse_exp(o.lstrip("-")) else: part = parse_exp(o) order_bys.append(OrderBy(part, direction)) query = query.set_orderby(order_bys) limitby = body.get("limitby") if limitby: limit, name = limitby query = query.set_limitby(LimitBy([Column(name)], int(limit))) extras = ( "limit", "offset", "granularity", "totals", "consistent", "turbo", "debug", "dry_run", "parent_api", ) for extra in extras: if body.get(extra) is not None: query = getattr(query, f"set_{extra}")(body.get(extra)) query.set_legacy(True) return query
def _render_trees(group: Group, user): materialized_hashes = list({ gh.hash for gh in GroupHash.objects.filter(project=group.project, group=group) }) # Evaluates to the index of the last hash that is in materialized_hashes, # or 1 otherwise. find_hash_expr = _construct_arraymax([1] + [ # type: ignore Function("indexOf", [Column("hierarchical_hashes"), hash]) for hash in materialized_hashes ]) # After much deliberation I (markus) decided that it would be best to # render the entire tree using one large Snuba query. A previous # implementation incurred n+1 queries on Snuba (n = number of materialized # hashes) and was very buggy when it came to missing materialized hashes # (which can happen if fallback/secondary grouping is turned on), events # were counted twice because those n+1 queries accidentally counted # overlapping sets of events, and the endpoint response time was kind of # bad because of n+1 query. # # It being one large query may also make it easier to add pagination down # the road. query = ( Query("events", Entity("events")).set_select([ Function("count", [], "event_count"), Function( "argMax", [Column("event_id"), Column("timestamp")], "event_id"), Function("max", [Column("timestamp")], "latest_event_timestamp"), # If hierarchical_hashes contains any of the materialized # hashes, find_hash_expr evaluates to the last found index and # arraySlice will give us this hash + the next child hash that # we use in groupby # # If hierarchical_hashes does not contain any of those hashes, # find_hash_expr will return 1 so we start slicing at the beginning. # This can happen when hierarchical_hashes is empty (=> # hash_slice = []), but we also try to recover gracefully from # a hypothetical case where we are missing some hashes in # postgres (unclear how this could be reached). # # We select some intermediate computation values here which we # definitely don't need the results of. It's just temp vars. Function( # First we find the materialized hash using find_hash_expr, # and subtract 1 which should be the parent hash if there # is one. If there isn't, this now can be an out-of-bounds # access by being 0 (arrays are indexed starting with 1) "minus", [find_hash_expr, 1], "parent_hash_i", ), # We clip the value to be at least 1, this will be where we # start slicing hierarchical_hashes. 0 would be an out of # bounds access. Function("greatest", [Column("parent_hash_i"), 1], "slice_start"), # This will return a slice of length 2 if the materialized hash # has been found at the beginning of the array, but return a # slice of length 3 if not. Function( "arraySlice", [ Column("hierarchical_hashes"), Column("slice_start"), Function( "minus", [ Function( "plus", [Column("parent_hash_i"), 3], ), Column("slice_start"), ], ), ], "hash_slice", ), Column("primary_hash"), ]).set_where(_get_group_filters(group)).set_groupby([ Column("parent_hash_i"), Column("slice_start"), Column("hash_slice"), Column("primary_hash"), ]).set_orderby( [OrderBy(Column("latest_event_timestamp"), Direction.DESC)])) rv = [] for row in snuba.raw_snql_query( query, referrer="api.group_split.render_grouping_tree")["data"]: if len(row["hash_slice"]) == 0: hash = row["primary_hash"] parent_hash = child_hash = None elif len(row["hash_slice"]) == 1: (hash, ) = row["hash_slice"] parent_hash = child_hash = None elif len(row["hash_slice"]) == 2: hash, child_hash = row["hash_slice"] parent_hash = None elif len(row["hash_slice"]) == 3: parent_hash, hash, child_hash = row["hash_slice"] else: raise ValueError("unexpected length of hash_slice") _add_hash( rv, group, user, parent_hash, hash, child_hash, row["event_count"], row["latest_event_timestamp"], row["event_id"], ) rv.sort(key=lambda tree: (tree["id"] or "", tree["childId"] or "")) return rv
def _render_trees(group: Group, user): materialized_hashes = { gh.hash for gh in GroupHash.objects.filter(project=group.project, group=group) } rv = [] common_where = _get_group_filters(group) for materialized_hash in materialized_hashes: # For every materialized hash we want to render parent and child # hashes, a limited view of the entire tree. We fetch one sample event # so we know how we need to slice hierarchical_hashes. hierarchical_hashes = _get_full_hierarchical_hashes( group, materialized_hash) if not hierarchical_hashes: # No hierarchical_hashes found, the materialized hash is probably # from flat grouping. parent_pos = None hash_pos = None child_pos = None slice_start = 0 else: materialized_pos = hierarchical_hashes.index(materialized_hash) if materialized_pos == 0: parent_pos = None hash_pos = 0 child_pos = 1 slice_start = 1 else: parent_pos = 0 hash_pos = 1 child_pos = 2 slice_start = materialized_pos # Select sub-views of the trees that contain materialized_hash. query = (Query("events", Entity("events")).set_select([ Function("count", [], "event_count"), Function( "argMax", [Column("event_id"), Column("timestamp")], "event_id"), Function("max", [Column("timestamp")], "latest_event_timestamp"), Function("arraySlice", [Column("hierarchical_hashes"), slice_start, 3], "hashes"), ]).set_where(common_where + [ Condition( Function( "has", [ Column("hierarchical_hashes"), materialized_hash, ], ), Op.EQ, 1, ), ]).set_groupby([Column("hashes")]).set_orderby( [OrderBy(Column("latest_event_timestamp"), Direction.DESC)])) for row in snuba.raw_snql_query(query)["data"]: assert not row["hashes"] or row["hashes"][ hash_pos] == materialized_hash event_id = row["event_id"] event = eventstore.get_event_by_id(group.project_id, event_id) tree = { "parentId": _get_checked(row["hashes"], parent_pos), "id": materialized_hash, "childId": _get_checked(row["hashes"], child_pos), "eventCount": row["event_count"], "latestEvent": serialize(event, user, EventSerializer()), } rv.append(tree) if not row["hashes"]: continue try: for variant in event.get_grouping_variants().values(): if not isinstance(variant, ComponentVariant): continue if variant.get_hash() == tree["parentId"]: tree["parentLabel"] = variant.component.tree_label if variant.get_hash() == tree["childId"]: tree["childLabel"] = variant.component.tree_label if variant.get_hash() == tree["id"]: tree["label"] = variant.component.tree_label except Exception: sentry_sdk.capture_exception() rv.sort(key=lambda tree: (tree["parentId"] or "", tree["id"] or "", tree["childId"] or "")) return rv
def _query_snuba(group: Group, id: int, offset=None, limit=None): query = (Query("events", Entity("events")).set_select([ Function( "arrayElement", [ Column("hierarchical_hashes"), Function("least", [ id + 1, Function("length", [Column("hierarchical_hashes")]) ]), ], "new_materialized_hash", ), Function("argMax", [Column("event_id"), Column("timestamp")], "latest_event_id"), Function("max", [Column("timestamp")], "latest_event_timestamp"), Function("count", [], "event_count"), ]).set_groupby([Column("new_materialized_hash")]).set_orderby( [OrderBy(Column("latest_event_timestamp"), Direction.DESC)])) levels_overview = get_levels_overview(group) # These conditions are always valid common_where = [ Condition(Column("primary_hash"), Op.EQ, levels_overview.only_primary_hash), Condition(Column("project_id"), Op.EQ, group.project_id), ] if id >= levels_overview.current_level: # Good path: Since we increase the level we can easily constrain the # entire query by group_id and timerange query = query.set_where(common_where + _get_group_filters(group)) else: # Bad path: We decreased the level and now we need to count events from # other groups. If we cannot filter by group_id, we can also not # restrict the timerange to anything at all. The Snuba API still # requires us to set a timerange, so we set it to the maximum of 90d. # # Luckily the minmax index on group_id alone is reasonably efficient so # that filtering by timerange (=primary key) is only a little bit # faster. now = datetime.datetime.now() new_materialized_hash = _get_hash_for_parent_level( group, id, levels_overview) query = query.set_where(common_where + [ Condition( Function("arrayElement", [Column("hierarchical_hashes"), id + 1]), Op.EQ, new_materialized_hash, ), Condition(Column("timestamp"), Op.GTE, now - datetime.timedelta(days=90)), Condition(Column("timestamp"), Op.LT, now + datetime.timedelta(seconds=10)), ]) if offset is not None: query = query.set_offset(offset) if limit is not None: query = query.set_limit(limit) return snuba.raw_snql_query( query, referrer="api.group_hashes_levels.get_level_new_issues")["data"]
Condition(Function("uniq", [Column("event_id")]), Op.GT, 1), BooleanCondition( BooleanOp.OR, [ Condition(Function("uniq", [Column("event_id")]), Op.GTE, 10), Condition( CurriedFunction("quantile", [0.5], [Column("duration")]), Op.GTE, 99, ), ], ), ], orderby=[OrderBy(Column("title"), Direction.ASC)], limitby=LimitBy([Column("title")], 5), limit=Limit(10), offset=Offset(1), granularity=Granularity(3600), totals=Totals(True), ), ( "MATCH (events SAMPLE 1000.0)", "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50", "BY title", ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') " "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') " "AND project_id IN tuple(1, 2, 3) " "AND (event_id = 'abc' OR duration > 10)"), "HAVING uniq(event_id) > 1 AND (uniq(event_id) >= 10 OR quantile(0.5)(duration) >= 99)",
def query_example_transactions( params: ParamsType, query: Optional[str], direction: str, orderby: str, span: Span, per_suspect: int = 5, offset: Optional[int] = None, ) -> Dict[Span, List[EventID]]: # there aren't any suspects, early return to save an empty query if per_suspect == 0: return {} selected_columns: List[str] = [ "id", "project.id", ] builder = SpanQueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=selected_columns, query=query, orderby=[], limit=per_suspect, offset=offset, ) # Make sure to resolve the custom span functions and add it to the columns and order bys orderby_columns = [ builder.resolve_span_function(function, span, f"{function}_span_time") for function in SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_functions ] builder.columns += orderby_columns builder.orderby += [ OrderBy(column, Direction.DESC if direction == "-" else Direction.ASC) for column in orderby_columns ] # we are only interested in the specific op, group pairs from the suspects builder.add_conditions( [ Condition(Function("has", [builder.column("spans_op"), span.op]), Op.EQ, 1), Condition(Function("has", [builder.column("spans_group"), span.group]), Op.EQ, 1), Condition( builder.resolve_span_function("count", span, "count_span_time"), Op.GT, 0, ), ] ) snql_query = builder.get_snql_query() results = raw_snql_query(snql_query, "api.organization-events-spans-performance-examples") examples: Dict[Span, List[EventID]] = {Span(span.op, span.group): []} for example in results["data"]: value = EventID(params["project_id"][0], example["id"]) examples[span].append(value) return examples
def build_key_transactions(interval, project): start, stop = interval # Take the 3 most frequently occuring transactions query = Query( dataset=Dataset.Transactions.value, match=Entity("transactions"), select=[ Column("transaction_name"), Function("count", []), ], where=[ Condition(Column("finish_ts"), Op.GTE, start), Condition(Column("finish_ts"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("transaction_name")], orderby=[OrderBy(Function("count", []), Direction.DESC)], limit=Limit(3), ) query_result = raw_snql_query(query, referrer="reports.key_transactions") key_errors = query_result["data"] transaction_names = map(lambda p: p["transaction_name"], key_errors) def query_p95(interval): start, stop = interval query = Query( dataset=Dataset.Transactions.value, match=Entity("transactions"), select=[ Column("transaction_name"), Function("quantile(0.95)", [Column("duration")], "p95"), ], where=[ Condition(Column("finish_ts"), Op.GTE, start), Condition(Column("finish_ts"), Op.LT, stop + timedelta(days=1)), Condition(Column("transaction_name"), Op.IN, transaction_names), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("transaction_name")], ) return raw_snql_query(query, referrer="reports.key_transactions.p95") query_result = query_p95((start, stop)) this_week_p95 = {} for point in query_result["data"]: this_week_p95[point["transaction_name"]] = point["p95"] query_result = query_p95( (start - timedelta(days=7), stop - timedelta(days=7))) last_week_p95 = {} for point in query_result["data"]: last_week_p95[point["transaction_name"]] = point["p95"] return [( e["transaction_name"], e["count()"], project.id, this_week_p95.get(e["transaction_name"], None), last_week_p95.get(e["transaction_name"], None), ) for e in key_errors]
pytest.param( Query( "discover", Join([Relationship(Entity("events", "e"), "has", Entity("sessions", "s"))]), ) .set_select( [ Column("group_id", Entity("events", "e")), Column("span_id", Entity("sessions", "s")), ] ) .set_where( [Condition(Column("timestamp", Entity("events", "e")), Op.IS_NOT_NULL)] ) .set_orderby( [OrderBy(Column("timestamp", Entity("events", "e")), Direction.DESC)] ) .set_limit(10) .set_offset(1) .set_granularity(3600) .set_consistent(True), ( "MATCH (e: events) -[has]-> (s: sessions)", "SELECT e.group_id, s.span_id", "WHERE e.timestamp IS NOT NULL", "ORDER BY e.timestamp DESC", "LIMIT 10", "OFFSET 1", "GRANULARITY 3600", ), [("consistent", True)],