def test_environment_filter(self): query = QueryBuilder( Dataset.Discover, self.params, "environment:prod", ["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("environment"), Op.EQ, "prod"), *self.default_conditions, ], ) query.get_snql_query().validate() query = QueryBuilder( Dataset.Discover, self.params, "environment:[dev, prod]", ["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("environment"), Op.IN, ["dev", "prod"]), *self.default_conditions, ], ) query.get_snql_query().validate()
def test_count_if(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count_if(event.type,equals,transaction)", 'count_if(event.type,notEquals,"transaction")', ], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.aggregates, [ Function( "countIf", [ Function("equals", [Column("type"), "transaction"]), ], "count_if_event_type_equals_transaction", ), Function( "countIf", [ Function("notEquals", [Column("type"), "transaction"]), ], "count_if_event_type_notEquals__transaction", ), ], )
def test_count_if_with_tags(self): query = QueryBuilder( Dataset.Discover, self.params, "", selected_columns=[ "count_if(foo,equals,bar)", 'count_if(foo,notEquals,"baz")', ], ) self.assertCountEqual(query.where, self.default_conditions) self.assertCountEqual( query.aggregates, [ Function( "countIf", [ Function("equals", [Column("tags[foo]"), "bar"]), ], "count_if_foo_equals_bar", ), Function( "countIf", [ Function("notEquals", [Column("tags[foo]"), "baz"]), ], "count_if_foo_notEquals__baz", ), ], )
def test_project_alias_column(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["project"]) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.IN, [project1.id, project2.id]), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id, project2.id], [project1.slug, project2.slug], "", ], "project", ) ], )
def setUp(self): self.start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc) self.end = datetime.datetime(2015, 5, 19, 10, 15, 1, tzinfo=timezone.utc) self.projects = [1, 2, 3] self.params = { "project_id": self.projects, "start": self.start, "end": self.end, } # These conditions should always be on a query when self.params is passed self.default_conditions = [ Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), Condition(Column("project_id"), Op.IN, self.projects), ]
def test_environment_param(self): self.params["environment"] = ["", "prod"] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"]) self.assertCountEqual( query.where, [ *self.default_conditions, Or([ Condition(Column("environment"), Op.IS_NULL), Condition(Column("environment"), Op.EQ, "prod"), ]), ], ) query.get_snql_query().validate() self.params["environment"] = ["dev", "prod"] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"]) self.assertCountEqual( query.where, [ *self.default_conditions, Condition(Column("environment"), Op.IN, ["dev", "prod"]), ], ) query.get_snql_query().validate()
def test_join_query(self) -> None: ev = Entity("events", "ev") gm = Entity("groupedmessage", "gm") join = Join([Relationship(ev, "grouped", gm)]) query = (Query("discover", join).set_select([ Column("group_id", ev), Column("status", gm), Function("avg", [Column("retention_days", ev)], "avg"), ]).set_groupby([Column("group_id", ev), Column("status", gm)]).set_where([ Condition(Column("project_id", ev), Op.EQ, self.project_id), Condition(Column("project_id", gm), Op.EQ, self.project_id), Condition(Column("timestamp", ev), Op.GTE, self.base_time), Condition(Column("timestamp", ev), Op.LT, self.next_time), ])) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def test_escape_edge_cases(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "times_seen")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"), ])) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def test_entity_validate_match(query: Query, exception: Optional[Exception]) -> None: query = query.set_where([ Condition(Column("required1"), Op.IN, [1, 2, 3]), Condition(Column("required2"), Op.EQ, 1), Condition(Column("time"), Op.GTE, BEFORE), Condition(Column("time"), Op.LT, AFTER), ], ) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_match(query, SEARCHER) else: validate_match(query, SEARCHER)
def test_columns_with_entities( column_name: str, entity: Any, translated: str, exception: Optional[Exception], ) -> None: if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): Column(column_name, entity) else: exp = Column(column_name, entity) assert exp.name == column_name assert exp.entity == entity assert TRANSLATOR.visit(exp) == translated
def test_columns( column_name: str, valid: tuple[str, Optional[str], Optional[str], Optional[str]], translated: str, exception: Optional[Exception], ) -> None: if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): Column(column_name) else: exp = Column(column_name) assert exp.name == valid[0] assert exp.subscriptable == valid[1] assert exp.key == valid[2] assert TRANSLATOR.visit(exp) == translated
def test_basic(self) -> None: now = datetime.now() self._insert_event_for_time(now) query = (Query(dataset="events", match=Entity("events")).set_select([ Function("count", [], "count") ]).set_groupby([Column("project_id")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)), ])) result = snuba.raw_snql_query(query) assert len(result["data"]) == 1 assert result["data"][0] == {"count": 1, "project_id": self.project.id}
def test_correct_times_seen_aggregate(self): query = _make_query( "statsPeriod=6h&interval=10m&groupBy=category&field=sum(times_seen)", {"organization_id": 1}, True, ) assert Function("count()", [Column("times_seen")], "times_seen") in query.select_params query = _make_query( "statsPeriod=6h&interval=1d&groupBy=category&field=sum(times_seen)", {"organization_id": 1}, True, ) assert Function("sum", [Column("times_seen")], "times_seen") in query.select_params
def test_correct_reason_mapping(self): query = _make_query( "statsPeriod=4d&interval=1d&groupBy=category&reason=spike_protection&field=sum(quantity)", {"organization_id": 1}, ) assert Condition(Column("reason"), Op.IN, ["smart_rate_limit"]) in query.conditions
def parse_extension_condition(col: str, values: Any, always_in: bool = False) -> Optional[Condition]: """ Create an SDK condition using the values passed as extensions in the legacy API. :param col: The column that the automatic condition applies too. :type col: str :param values: The RHS values of the condition. Could be a single scalar or a sequence of values. :type values: Any :param always_in: Some conditions always use an IN condition, even if there is a single value. :type always_in: bool """ column = Column(col) if isinstance(values, int): if always_in: values = (values, ) else: return Condition(column, Op.EQ, values) if isinstance(values, (list, tuple)): rhs: Sequence[Any] = tuple(map(parse_scalar, values)) return Condition(column, Op.IN, rhs) return None
def test_entity_validate_match( conditions: ConditionGroup, entity: Entity, exception: Optional[Exception], ) -> None: query = Query(dataset="test", match=entity, select=[Column("test1"), Column("required1")]) query = query.set_where(conditions) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_required_columns(query) else: validate_required_columns(query)
def column(self, name: str) -> Column: """Given an unresolved sentry name and return a snql column. :param name: The unresolved sentry name. """ resolved_column = self.resolve_column_name(name) return Column(resolved_column)
def aliased_column(self, name: str, alias: str) -> SelectType: """Given an unresolved sentry name and an expected alias, return a snql column that will be aliased to the expected alias. :param name: The unresolved sentry name. :param alias: The expected alias in the result. """ # TODO: This method should use an aliased column from the SDK once # that is available to skip these hacks that we currently have to # do aliasing. resolved = self.resolve_column_name(name) column = Column(resolved) # If the expected alias is identical to the resolved snuba column, # no need to do this aliasing trick. # # Additionally, tags of the form `tags[...]` can't be aliased again # because it confuses the sdk. if alias == resolved: return column # If the expected aliases differs from the resolved snuba column, # make sure to alias the expression appropriately so we get back # the column with the correct names. return AliasedExpression(column, alias)
def test_correct_outcome_mapping(self): query = _make_query( "statsPeriod=4d&interval=1d&groupBy=category&outcome=accepted&field=sum(quantity)", {"organization_id": 1}, ) assert Condition(Column("outcome"), Op.IN, [Outcome.ACCEPTED]) in query.conditions
def _check_releases_have_health_data( organization_id: int, project_ids: List[int], release_versions: List[str], start: datetime, end: datetime, ) -> Set[str]: """ Returns a set of all release versions that have health data within a given period of time. """ if not release_versions: return set() query = Query( dataset="sessions", match=Entity("sessions"), select=[Column("release")], groupby=[Column("release")], where=[ Condition(Column("started"), Op.GTE, start), Condition(Column("started"), Op.LT, end), Condition(Column("org_id"), Op.EQ, organization_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("release"), Op.IN, release_versions), ], ) data = snuba.raw_snql_query(query, referrer="snuba.sessions.check_releases_have_health_data")[ "data" ] return {row["release"] for row in data}
class TimeseriesQueryBuilder(QueryFilter): # type: ignore time_column = Column("time") def __init__( self, dataset: Dataset, params: ParamsType, granularity: int, query: Optional[str] = None, selected_columns: Optional[List[str]] = None, equations: Optional[List[str]] = None, functions_acl: Optional[List[str]] = None, limit: Optional[int] = 10000, ): super().__init__( dataset, params, auto_fields=False, functions_acl=functions_acl, equation_config={ "auto_add": True, "aggregates_only": True }, ) self.where, self.having = self.resolve_conditions( query, use_aggregate_conditions=False) self.limit = None if limit is None else Limit(limit) # params depends on parse_query, and conditions being resolved first since there may be projects in conditions self.where += self.resolve_params() self.columns = self.resolve_select(selected_columns, equations) self.granularity = Granularity(granularity) # This is a timeseries, the groupby will always be time self.groupby = [self.time_column] @property def select(self) -> List[SelectType]: if not self.aggregates: raise InvalidSearchQuery( "Cannot query a timeseries without a Y-Axis") # Casting for now since QueryFields/QueryFilter are only partially typed return cast(List[SelectType], self.aggregates) def get_snql_query(self) -> Query: return Query( dataset=self.dataset.value, match=Entity(self.dataset.value), select=self.select, where=self.where, having=self.having, groupby=self.groupby, orderby=[OrderBy(self.time_column, Direction.ASC)], granularity=self.granularity, limit=self.limit, )
def get_conditions(self, query: QueryDict, params: Mapping[Any, Any]) -> List[Any]: query_conditions = [ Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ] for filter_name in DIMENSION_MAP: raw_filter = query.getlist(filter_name, []) resolved_filter = DIMENSION_MAP[filter_name].resolve_filter(raw_filter) if len(resolved_filter) > 0: query_conditions.append(Condition(Column(filter_name), Op.IN, resolved_filter)) if "project_id" in params: query_conditions.append( Condition(Column("project_id"), Op.IN, params["project_id"]), ) if "organization_id" in params: query_conditions.append( Condition(Column("org_id"), Op.EQ, params["organization_id"]), ) return query_conditions
def test_filter_keys(self): query = _make_query( "statsPeriod=6h&interval=10m&groupBy=category&field=sum(times_seen)", {"organization_id": 1}, True, ) assert Condition(Column("org_id"), Op.EQ, 1) in query.conditions query = _make_query( "statsPeriod=6h&interval=1d&groupBy=category&field=sum(times_seen)", { "organization_id": 1, "project_id": [1, 2, 3, 4, 5] }, True, ) assert Condition(Column("org_id"), Op.EQ, 1) in query.conditions assert Condition(Column("project_id"), Op.IN, [1, 2, 3, 4, 5]) in query.conditions
def test_cache(self): """Minimal test to verify if use_cache works""" results = snuba.raw_snql_query( Query( "events", Entity("events"), select=[Column("event_id")], where=[ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, timezone.now() - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, timezone.now()), ], limit=Limit(1), ), use_cache=True, ) assert results["data"] == []
def build_key_errors(interval, project): start, stop = interval # Take the 3 most frequently occuring events query = Query( dataset=Dataset.Events.value, match=Entity("events"), select=[Column("group_id"), Function("count", [])], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), ], groupby=[Column("group_id")], orderby=[OrderBy(Function("count", []), Direction.DESC)], limit=Limit(3), ) query_result = raw_snql_query(query, referrer="reports.key_errors") key_errors = query_result["data"] return [(e["group_id"], e["count()"]) for e in key_errors]
def test_correct_category_mapping(self): query = _make_query( "statsPeriod=4d&interval=1d&category=error&field=sum(quantity)", {"organization_id": 1}, ) assert (Condition( Column("category"), Op.IN, [DataCategory.DEFAULT, DataCategory.ERROR, DataCategory.SECURITY], )) in query.conditions
def test_project_in_condition_filters(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder( Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.EQ, project1.id), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], )
def __init__( self, query: QueryDict, params: Mapping[Any, Any], allow_minute_resolution: Optional[bool] = True, ): raw_fields = query.getlist("field", []) raw_groupby = query.getlist("groupBy", []) if len(raw_fields) == 0: raise InvalidField('At least one "field" is required.') self.fields = {} self.query: List[Any] = [] # not used but needed for compat with sessions logic allowed_resolution = ( AllowedResolution.one_minute if allow_minute_resolution else AllowedResolution.one_hour ) start, end, rollup = get_constrained_date_range(query, allowed_resolution) self.dataset, self.match = _outcomes_dataset(rollup) self.rollup = rollup self.start = start self.end = end self.select_params = [] for key in raw_fields: if key not in COLUMN_MAP: raise InvalidField(f'Invalid field: "{key}"') field = COLUMN_MAP[key] self.select_params.append(field.select_params(self.dataset)) self.fields[key] = field self.groupby = [] for key in raw_groupby: if key not in GROUPBY_MAP: raise InvalidField(f'Invalid groupBy: "{key}"') self.groupby.append(GROUPBY_MAP[key]) if len(query.getlist("category", [])) == 0 and "category" not in raw_groupby: raise InvalidQuery("Query must have category as groupby or filter") query_columns = set() for field in self.fields.values(): query_columns.update(field.get_snuba_columns(raw_groupby)) for groupby in self.groupby: query_columns.update(groupby.get_snuba_columns()) self.query_columns = list(query_columns) query_groupby = set() for groupby in self.groupby: query_groupby.update(groupby.get_snuba_groupby()) self.query_groupby = list(query_groupby) self.group_by = [] for key in self.query_groupby: self.group_by.append(Column(key)) self.conditions = self.get_conditions(query, params)
def test_orderby_duplicate_columns(self): query = QueryBuilder( Dataset.Discover, self.params, selected_columns=["user.email", "user.email"], orderby=["user.email"], ) self.assertCountEqual( query.orderby, [OrderBy(Column("email"), Direction.ASC)], )