def test_limitby(column: Any, count: Any, exception: Optional[Exception]) -> None: if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): LimitBy(column, count) else: assert LimitBy(column, count).count == count
def test_simple_limitby(self): query = QueryBuilder( dataset=Dataset.Discover, params=self.params, query="", selected_columns=["message"], orderby="message", limitby=("message", 1), limit=4, ) assert query.limitby == LimitBy(Column("message"), 1)
def resolve_limitby( self, limitby: Optional[Tuple[str, int]]) -> Optional[LimitBy]: if limitby is None: return None column, count = limitby resolved = self.resolve_column(column) if isinstance(resolved, Column): return LimitBy(resolved, count) # TODO: Limit By can only operate on a `Column`. This has the implication # that non aggregate transforms are not allowed in the order by clause. raise InvalidSearchQuery( f"{column} used in a limit by but is not a column.")
def json_to_snql(body: Mapping[str, Any], entity: str) -> Query: """ This will output a Query object that matches the Legacy query body that was passed in. The entity is necessary since the SnQL API requires an explicit entity. This doesn't support subquery or joins. :param body: The legacy API body. :type body: Mapping[str, Any] :param entity: The name of the entity being queried. :type entity: str :raises InvalidExpressionError, InvalidQueryError: If the legacy body is invalid, the SDK will raise an exception. """ dataset = body.get("dataset") or entity sample = body.get("sample") if sample is not None: sample = float(sample) query = Query(dataset, Entity(entity, None, sample)) selected_columns = [] for a in body.get("aggregations", []): selected_columns.append(parse_exp(list(a))) selected = [] for s in body.get("selected_columns", []): if isinstance(s, tuple): selected.append(list(s)) else: selected.append(s) selected_columns.extend(list(map(parse_exp, selected))) arrayjoin = body.get("arrayjoin") if arrayjoin: query = query.set_array_join([Column(arrayjoin)]) query = query.set_select(selected_columns) groupby = body.get("groupby", []) if groupby and not isinstance(groupby, list): groupby = [groupby] parsed_groupby = [] for g in groupby: if isinstance(g, tuple): g = list(g) parsed_groupby.append(parse_exp(g)) query = query.set_groupby(parsed_groupby) conditions: list[Union[Or, Condition]] = [] if body.get("organization"): org_cond = parse_extension_condition("org_id", body["organization"]) if org_cond: conditions.append(org_cond) assert isinstance(query.match, Entity) time_column = get_required_time_column(query.match.name) if time_column: time_cols = (("from_date", Op.GTE), ("to_date", Op.LT)) for col, op in time_cols: date_val = body.get(col) if date_val: conditions.append( Condition(Column(time_column), op, parse_datetime(date_val))) if body.get("project"): proj_cond = parse_extension_condition("project_id", body["project"], True) if proj_cond: conditions.append(proj_cond) for cond in body.get("conditions", []): if not is_condition(cond): or_conditions = [] for or_cond in cond: or_conditions.append(parse_condition(or_cond)) if len(or_conditions) > 1: conditions.append(Or(or_conditions)) else: conditions.extend(or_conditions) else: conditions.append(parse_condition(cond)) query = query.set_where(conditions) having: list[Union[Or, Condition]] = [] for cond in body.get("having", []): if not is_condition(cond): or_conditions = [] for or_cond in cond: or_conditions.append(parse_condition(or_cond)) having.append(Or(or_conditions)) else: having.append(parse_condition(cond)) query = query.set_having(having) order_by = body.get("orderby") if order_by: if not isinstance(order_by, list): order_by = [order_by] order_bys = [] for o in order_by: direction = Direction.ASC if isinstance(o, list): first = o[0] if isinstance(first, str) and first.startswith("-"): o[0] = first.lstrip("-") direction = Direction.DESC part = parse_exp(o) elif isinstance(o, str): if o.startswith("-"): direction = Direction.DESC part = parse_exp(o.lstrip("-")) else: part = parse_exp(o) order_bys.append(OrderBy(part, direction)) query = query.set_orderby(order_bys) limitby = body.get("limitby") if limitby: limit, name = limitby query = query.set_limitby(LimitBy([Column(name)], int(limit))) extras = ( "limit", "offset", "granularity", "totals", "consistent", "turbo", "debug", "dry_run", "parent_api", ) for extra in extras: if body.get(extra) is not None: query = getattr(query, f"set_{extra}")(body.get(extra)) query.set_legacy(True) return query
def query_example_transactions( params: ParamsType, query: Optional[str], direction: str, orderby: str, spans: List[Span], per_suspect: int = 5, offset: Optional[int] = None, ) -> Dict[Span, List[EventID]]: # there aren't any suspects, early return to save an empty query if not spans or per_suspect == 0: return {} orderby_columns = SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_sort selected_columns: List[str] = [ "id", "project.id", "project", "array_join(spans_op)", "array_join(spans_group)", *orderby_columns, ] builder = QueryBuilder( dataset=Dataset.Discover, params=params, selected_columns=selected_columns, query=query, orderby=[direction + column for column in orderby_columns], # we want only `per_suspect` examples for each suspect limit=len(spans) * per_suspect, offset=offset, functions_acl=[ "array_join", "sumArray", "percentileArray", "maxArray" ], ) # we are only interested in the specific op, group pairs from the suspects builder.add_conditions([ Condition( Function( "tuple", [ builder.resolve_function("array_join(spans_op)"), builder.resolve_function("array_join(spans_group)"), ], ), Op.IN, Function( "tuple", [ Function("tuple", [suspect.op, suspect.group]) for suspect in spans ], ), ), ]) if len(spans) > 1: # Hack: the limit by clause only allows columns but here we want to # do a limitby on the two array joins. For the time being, directly # do the limitby on the internal snuba name for the span group column # but this should not be relied upon in production, and if two spans # differ only by the span op, this will result in a incorrect query builder.limitby = LimitBy(Column("_snuba_array_join_spans_group"), per_suspect) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-examples") examples: Dict[Span, List[EventID]] = { Span(suspect.op, suspect.group): [] for suspect in spans } for example in results["data"]: key = Span(example["array_join_spans_op"], example["array_join_spans_group"]) value = EventID(example["project.id"], example["project"], example["id"]) examples[key].append(value) return examples
BooleanCondition( BooleanOp.OR, [ Condition(Function("uniq", [Column("event_id")]), Op.GTE, 10), Condition( CurriedFunction("quantile", [0.5], [Column("duration")]), Op.GTE, 99, ), ], ), ], orderby=[OrderBy(Column("title"), Direction.ASC)], limitby=LimitBy([Column("title")], 5), limit=Limit(10), offset=Offset(1), granularity=Granularity(3600), totals=Totals(True), ), ( "MATCH (events SAMPLE 1000.0)", "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50", "BY title", ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') " "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') " "AND project_id IN tuple(1, 2, 3) " "AND (event_id = 'abc' OR duration > 10)"), "HAVING uniq(event_id) > 1 AND (uniq(event_id) >= 10 OR quantile(0.5)(duration) >= 99)", "ORDER BY title ASC",