def __post_init__(self) -> None: """ This has a different validation flow from normal expressions, since a query is not necessarily always correct. For example, you can create a Query with no select columns, which will fail in the validate. However it shouldn't fail right away since the select columns can be added later. """ # TODO: Whitelist of Datasets and possible entities if not isinstance(self.dataset, str) or self.dataset == "": raise InvalidQuery("queries must have a valid dataset") if not isinstance(self.match, Entity): raise InvalidQuery("queries must have a valid Entity")
def set_groupby( self, groupby: Sequence[Union[Column, CurriedFunction, Function]]) -> "Query": if not list_type(groupby, (Column, CurriedFunction, Function)): raise InvalidQuery( "groupby clause must be a list of Column and/or Function") return self._replace("groupby", groupby)
def set_select( self, select: Sequence[Union[Column, CurriedFunction, Function]]) -> "Query": if not list_type(select, (Column, CurriedFunction, Function)) or not select: raise InvalidQuery( "select clause must be a non-empty list of Column and/or Function" ) return self._replace("select", select)
def json_to_snql(body: Mapping[str, Any], entity: str) -> Query: dataset = body.get("dataset", "") sample = body.get("sample") query = Query(dataset, Entity(entity, sample)) selected_columns = list(map(parse_exp, body.get("selected_columns", []))) for a in body.get("aggregations", []): selected_columns.append(parse_exp(a)) arrayjoin = body.get("arrayjoin") if arrayjoin: selected_columns.append( Function("arrayJoin", [Column(arrayjoin)], arrayjoin)) query = query.set_select(selected_columns) groupby = body.get("groupby", []) if groupby and not isinstance(groupby, list): groupby = [groupby] query = query.set_groupby(list(map(parse_exp, groupby))) conditions = [] for cond in body.get("conditions", []): if len(cond) != 3 or not isinstance(cond[1], str): raise InvalidQuery("OR conditions not supported yet") conditions.append( Condition(parse_exp(cond[0]), Op(cond[1]), parse_scalar(cond[2]))) extra_conditions = [("project", "project_id"), ("organization", "org_id")] for cond, col in extra_conditions: column = Column(col) values = body.get(cond) if isinstance(values, int): conditions.append(Condition(column, Op.EQ, values)) elif isinstance(values, list): rhs: Sequence[Any] = list(map(parse_scalar, values)) conditions.append(Condition(column, Op.IN, rhs)) elif isinstance(values, tuple): rhs = tuple(map(parse_scalar, values)) conditions.append(Condition(column, Op.IN, rhs)) date_conds = [("from_date", Op.GT), ("to_date", Op.LTE)] for cond, op in date_conds: date_str = body.get(cond, "") if date_str: # HACK: This is to get sessions working quickly. # The time column should depend on the entity. conditions.append( Condition(Column("started"), op, parse_datetime(date_str))) query = query.set_where(conditions) having = [] for cond in body.get("having", []): if len(cond) != 3 or not isinstance(cond[1], str): raise InvalidQuery("OR conditions not supported yet") having.append( Condition(parse_exp(cond[0]), Op(cond[1]), parse_scalar(cond[2]))) query = query.set_having(having) order_by = body.get("orderby") if order_by: if not isinstance(order_by, list): order_by = [order_by] order_bys = [] for o in order_by: direction = Direction.ASC if isinstance(o, str) and o.startswith("-"): direction = Direction.DESC o = o.lstrip("-") order_bys.append(OrderBy(parse_exp(o), direction)) query = query.set_orderby(order_bys) limitby = body.get("limitby") if limitby: limit, name = limitby query = query.set_limitby(LimitBy(Column(name), int(limit))) extras = ( "limit", "offset", "granularity", "totals", "consistent", "turbo", "debug", ) for extra in extras: if body.get(extra) is not None: query = getattr(query, f"set_{extra}")(body.get(extra)) return query
]).set_limit(10).set_offset(1).set_granularity(3600), None, id="unary condition", ), pytest.param( Query( dataset="discover", match=Entity("events"), select=None, groupby=None, where=[Condition(Column("timestamp"), Op.GT, NOW)], limit=Limit(10), offset=Offset(1), granularity=Granularity(3600), ), InvalidQuery("query must have at least one column in select"), id="missing select", ), pytest.param( Query( dataset="discover", match=Entity("events"), select=[Function("count", [])], groupby=[Column("title")], where=[Condition(Column("timestamp"), Op.GT, NOW)], limit=Limit(10), offset=Offset(1), granularity=Granularity(3600), ), InvalidQuery( "Function(function='count', initializers=None, parameters=[], alias=None) must have an alias in the select"
def set_match(self, match: Entity) -> "Query": if not isinstance(match, Entity): raise InvalidQuery(f"{match} must be a valid Entity") return self._replace("match", match)
def set_limitby(self, limitby: LimitBy) -> "Query": if not isinstance(limitby, LimitBy): raise InvalidQuery("limitby clause must be a LimitBy") return self._replace("limitby", limitby)
def set_orderby(self, orderby: Sequence[OrderBy]) -> "Query": if not list_type(orderby, (OrderBy, )): raise InvalidQuery("orderby clause must be a list of OrderBy") return self._replace("orderby", orderby)
def set_having(self, conditions: Sequence[Condition]) -> "Query": if not list_type(conditions, (Condition, )): raise InvalidQuery("having clause must be a list of Condition") return self._replace("having", conditions)