def parse_extension_condition(col: str, values: Any, always_in: bool = False) -> Optional[Condition]: """ Create an SDK condition using the values passed as extensions in the legacy API. :param col: The column that the automatic condition applies too. :type col: str :param values: The RHS values of the condition. Could be a single scalar or a sequence of values. :type values: Any :param always_in: Some conditions always use an IN condition, even if there is a single value. :type always_in: bool """ column = Column(col) if isinstance(values, int): if always_in: values = (values, ) else: return Condition(column, Op.EQ, values) if isinstance(values, (list, tuple)): rhs: Sequence[Any] = tuple(map(parse_scalar, values)) return Condition(column, Op.IN, rhs) return None
def test_environment_filter(self): query = QueryBuilder( Dataset.Discover, self.params, "environment:prod", ["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("environment"), Op.EQ, "prod"), *self.default_conditions, ], ) query.get_snql_query().validate() query = QueryBuilder( Dataset.Discover, self.params, "environment:[dev, prod]", ["environment"], ) self.assertCountEqual( query.where, [ Condition(Column("environment"), Op.IN, ["dev", "prod"]), *self.default_conditions, ], ) query.get_snql_query().validate()
def get_snql_function_aliases(trend_columns: TrendColumns, trend_type: str) -> Dict[str, Alias]: """Construct a dict of aliases this is because certain conditions behave differently depending on the trend type like trend_percentage and trend_difference """ return { "trend_percentage()": Alias( lambda aggregate_filter: Condition( trend_columns["trend_percentage"], Op( CORRESPONDENCE_MAP[aggregate_filter.operator] if trend_type == IMPROVED else aggregate_filter.operator ), 1 + (aggregate_filter.value.value * (-1 if trend_type == IMPROVED else 1)), ), ["percentage", "transaction.duration"], trend_columns["trend_percentage"], ), "trend_difference()": Alias( lambda aggregate_filter: Condition( trend_columns["trend_difference"], Op( CORRESPONDENCE_MAP[aggregate_filter.operator] if trend_type == IMPROVED else aggregate_filter.operator ), -1 * aggregate_filter.value.value if trend_type == IMPROVED else aggregate_filter.value.value, ), ["minus", "transaction.duration"], trend_columns["trend_difference"], ), "confidence()": Alias( lambda aggregate_filter: Condition( trend_columns["t_test"], Op( CORRESPONDENCE_MAP[aggregate_filter.operator] if trend_type == REGRESSION else aggregate_filter.operator ), -1 * aggregate_filter.value.value if trend_type == REGRESSION else aggregate_filter.value.value, ), None, trend_columns["t_test"], ), "count_percentage()": Alias( lambda aggregate_filter: Condition( trend_columns["count_percentage"], Op(aggregate_filter.operator), aggregate_filter.value.value, ), ["percentage", "count"], trend_columns["count_percentage"], ), }
def test_simple(self) -> None: query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 2 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2
def test_join_query(self) -> None: ev = Entity("events", "ev") gm = Entity("groupedmessage", "gm") join = Join([Relationship(ev, "grouped", gm)]) query = (Query("discover", join).set_select([ Column("group_id", ev), Column("status", gm), Function("avg", [Column("retention_days", ev)], "avg"), ]).set_groupby([Column("group_id", ev), Column("status", gm)]).set_where([ Condition(Column("project_id", ev), Op.EQ, self.project_id), Condition(Column("project_id", gm), Op.EQ, self.project_id), Condition(Column("timestamp", ev), Op.GTE, self.base_time), Condition(Column("timestamp", ev), Op.LT, self.next_time), ])) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def test_join_validate_match( conditions: ConditionGroup, entity: Entity, exception: Optional[Exception], ) -> None: other_join_entity = Entity("test_b", "tb", None, SCHEMA) join2_conditions = [ Condition(Column("required1", other_join_entity), Op.IN, [1, 2, 3]), Condition(Column("required2", other_join_entity), Op.EQ, 1), Condition(Column("time", other_join_entity), Op.GTE, BEFORE), Condition(Column("time", other_join_entity), Op.LT, AFTER), *conditions, ] query = Query( dataset="test", match=Join([Relationship(entity, "has", other_join_entity)]), select=[ Column("test1", entity), Column("required1", other_join_entity) ], where=join2_conditions, ) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_required_columns(query) else: validate_required_columns(query)
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def test_project_alias_column_with_project_condition(self): project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder( Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["project"] ) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.EQ, project1.id), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) # Because of the condition on project there should only be 1 project in the transform self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id], [project1.slug], "", ], "project", ) ], )
def _check_releases_have_health_data( organization_id: int, project_ids: List[int], release_versions: List[str], start: datetime, end: datetime, ) -> Set[str]: """ Returns a set of all release versions that have health data within a given period of time. """ if not release_versions: return set() query = Query( dataset="sessions", match=Entity("sessions"), select=[Column("release")], groupby=[Column("release")], where=[ Condition(Column("started"), Op.GTE, start), Condition(Column("started"), Op.LT, end), Condition(Column("org_id"), Op.EQ, organization_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("release"), Op.IN, release_versions), ], ) data = snuba.raw_snql_query(query, referrer="snuba.sessions.check_releases_have_health_data")[ "data" ] return {row["release"] for row in data}
def test_environment_param(self): self.params["environment"] = ["", "prod"] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"]) self.assertCountEqual( query.where, [ *self.default_conditions, Or([ Condition(Column("environment"), Op.IS_NULL), Condition(Column("environment"), Op.EQ, "prod"), ]), ], ) query.get_snql_query().validate() self.params["environment"] = ["dev", "prod"] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"]) self.assertCountEqual( query.where, [ *self.default_conditions, Condition(Column("environment"), Op.IN, ["dev", "prod"]), ], ) query.get_snql_query().validate()
def setUp(self): self.start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc) self.end = datetime.datetime(2015, 5, 19, 10, 15, 1, tzinfo=timezone.utc) self.projects = [1, 2, 3] self.params = { "project_id": self.projects, "start": self.start, "end": self.end, } # These conditions should always be on a query when self.params is passed self.default_conditions = [ Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), Condition(Column("project_id"), Op.IN, self.projects), ]
def test_project_alias_column(self): # TODO(snql-boolean): Update this to match the corresponding test in test_filter project1 = self.create_project() project2 = self.create_project() self.params["project_id"] = [project1.id, project2.id] query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["project"]) self.assertCountEqual( query.where, [ Condition(Column("project_id"), Op.IN, [project1.id, project2.id]), Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ], ) self.assertCountEqual( query.select, [ Function( "transform", [ Column("project_id"), [project1.id, project2.id], [project1.slug, project2.slug], "", ], "project", ) ], )
def _environment_filter_converter( self, search_filter: SearchFilter, _: str, ) -> WhereType: # conditions added to env_conditions can be OR'ed env_conditions = [] value = search_filter.value.value values = set(value if isinstance(value, (list, tuple)) else [value]) # sorted for consistency values = sorted(f"{value}" for value in values) environment = self.column("environment") # the "no environment" environment is null in snuba if "" in values: values.remove("") operator = Op.IS_NULL if search_filter.operator == "=" else Op.IS_NOT_NULL env_conditions.append(Condition(environment, operator)) if len(values) == 1: operator = Op.EQ if search_filter.operator in EQUALITY_OPERATORS else Op.NEQ env_conditions.append(Condition(environment, operator, values.pop())) elif values: operator = Op.IN if search_filter.operator in EQUALITY_OPERATORS else Op.NOT_IN env_conditions.append(Condition(environment, operator, values)) if len(env_conditions) > 1: return Or(conditions=env_conditions) else: return env_conditions[0]
def _get_group_filters(group: Group): return [ Condition(Column("timestamp"), Op.GTE, group.first_seen), Condition(Column("timestamp"), Op.LT, group.last_seen + datetime.timedelta(seconds=1)), Condition(Column("project_id"), Op.EQ, group.project_id), Condition(Column("group_id"), Op.EQ, group.id), ]
def test_escape_edge_cases(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "times_seen")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"), ])) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def _get_group_filters(group: Group): return [ Condition(Column("project_id"), Op.EQ, group.project_id), Condition(Column("group_id"), Op.EQ, group.id), # XXX(markus): Those conditions are subject to last_seen being totally # in sync with max(timestamp) of Snuba which can be false. In fact we # know that during merge/unmerge last_seen can become permanently # wrong: https://github.com/getsentry/sentry/issues/25673 # # We add both conditions because Snuba query API requires us to, and # because it does bring a significant performance boost. Condition(Column("timestamp"), Op.GTE, group.first_seen), Condition(Column("timestamp"), Op.LT, group.last_seen + datetime.timedelta(seconds=1)), ]
def test_entity_validate_match(query: Query, exception: Optional[Exception]) -> None: query = query.set_where([ Condition(Column("required1"), Op.IN, [1, 2, 3]), Condition(Column("required2"), Op.EQ, 1), Condition(Column("time"), Op.GTE, BEFORE), Condition(Column("time"), Op.LT, AFTER), ], ) if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): validate_match(query, SEARCHER) else: validate_match(query, SEARCHER)
def test_basic(self) -> None: now = datetime.now() self._insert_event_for_time(now) query = (Query(dataset="events", match=Entity("events")).set_select([ Function("count", [], "count") ]).set_groupby([Column("project_id")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)), ])) result = snuba.raw_snql_query(query) assert len(result["data"]) == 1 assert result["data"][0] == {"count": 1, "project_id": self.project.id}
def test_orderby(self) -> None: self.project_id3 = next(self.id_iter) self.org_id2 = next(self.id_iter) self.generate_session_events(self.org_id2, self.project_id3) query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), orderby=[OrderBy(Column("org_id"), Direction.ASC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2 assert data["data"][2]["org_id"] == self.org_id2 assert data["data"][2]["project_id"] == self.project_id3 query = query.set_orderby( [OrderBy(Column("org_id"), Direction.DESC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id2 assert data["data"][0]["project_id"] == self.project_id3 assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id assert data["data"][2]["org_id"] == self.org_id assert data["data"][2]["project_id"] == self.project_id2
def test_correct_reason_mapping(self): query = _make_query( "statsPeriod=4d&interval=1d&groupBy=category&reason=spike_protection&field=sum(quantity)", {"organization_id": 1}, ) assert Condition(Column("reason"), Op.IN, ["smart_rate_limit"]) in query.conditions
def _get_full_hierarchical_hashes(group: Group, hash: str) -> Optional[Sequence[str]]: query = ( Query("events", Entity("events")) .set_select( [ Column("hierarchical_hashes"), ] ) .set_where( _get_group_filters(group) + [ Condition( Function( "has", [Column("hierarchical_hashes"), hash], ), Op.EQ, 1, ), ] ) ) data = snuba.raw_snql_query(query, referrer="group_split.get_full_hierarchical_hashes")["data"] if not data: return None return data[0]["hierarchical_hashes"]
def test_invalid_query() -> None: with pytest.raises(InvalidQueryError, match=re.escape("queries must have a valid dataset")): Query(dataset=1, match=Entity("events")) # type: ignore with pytest.raises(InvalidQueryError, match=re.escape("queries must have a valid dataset")): Query(dataset="", match=Entity("events")) with pytest.raises(InvalidQueryError, match=re.escape("queries must have a valid Entity")): Query(dataset="discover", match="events") # type: ignore with pytest.raises( InvalidConditionError, match=re.escape( "invalid condition: LHS of a condition must be a Column, CurriedFunction or Function, not <class 'snuba_sdk.aliased_expression.AliasedExpression'>" ), ): (Query("discover", Entity("events")).set_select( [AliasedExpression(Column("transaction"), "tn")]).set_where([ Condition(AliasedExpression(Column("project_id"), "pi"), Op.IN, (1, )) ] # type: ignore ))
def get_event_stats( query_columns: Sequence[str], query: str, params: Dict[str, str], rollup: int, zerofill_results: bool, comparison_delta: Optional[datetime] = None, ) -> SnubaTSResult: with sentry_sdk.start_span( op="discover.discover", description="timeseries.filter_transform"): builder = TimeseriesQueryBuilder( Dataset.Discover, params, rollup, query=query, selected_columns=query_columns, functions_acl=[ "array_join", "percentileArray", "sumArray" ], ) span_op_column = builder.resolve_function( "array_join(spans_op)") span_group_column = builder.resolve_function( "array_join(spans_group)") # Adding spans.op and spans.group to the group by because # We need them in the query to help the array join optimizer # in snuba take effect but the TimeseriesQueryBuilder # removes all non aggregates from the select clause. builder.groupby.extend([span_op_column, span_group_column]) builder.add_conditions([ Condition( Function("tuple", [span_op_column, span_group_column]), Op.IN, Function("tuple", [Function("tuple", [span.op, span.group])]), ), ]) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-stats") with sentry_sdk.start_span( op="discover.discover", description="timeseries.transform_results"): result = discover.zerofill( results["data"], params["start"], params["end"], rollup, "time", ) return SnubaTSResult({"data": result}, params["start"], params["end"], rollup)
def test_correct_outcome_mapping(self): query = _make_query( "statsPeriod=4d&interval=1d&groupBy=category&outcome=accepted&field=sum(quantity)", {"organization_id": 1}, ) assert Condition(Column("outcome"), Op.IN, [Outcome.ACCEPTED]) in query.conditions
def __init__( self, num_buckets: int, histogram_column: str, histogram_rows: Optional[int], histogram_params: HistogramParams, key_column: Optional[str], field_names: Optional[List[Union[str, Any, None]]], groupby: Optional[List[str]], *args: Any, **kwargs: Any, ): kwargs["functions_acl"] = kwargs.get("functions_acl", []) + self.base_function_acl super().__init__(*args, **kwargs) self.additional_groupby = groupby selected_columns = kwargs["selected_columns"] resolved_histogram = self.resolve_column(histogram_column) # Reset&Ignore the columns from the QueryBuilder self.aggregates: List[CurriedFunction] = [] self.columns = [self.resolve_column("count()"), resolved_histogram] if key_column is not None and field_names is not None: key_values: List[str] = [ field for field in field_names if isinstance(field, str) ] self.where.append( Condition(self.resolve_column(key_column), Op.IN, key_values)) # make sure to bound the bins to get the desired range of results min_bin = histogram_params.start_offset self.where.append(Condition(resolved_histogram, Op.GTE, min_bin)) max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets self.where.append(Condition(resolved_histogram, Op.LTE, max_bin)) if key_column is not None: self.columns.append(self.resolve_column(key_column)) groups = len( selected_columns) if histogram_rows is None else histogram_rows self.limit = Limit(groups * num_buckets) self.orderby = (self.orderby if self.orderby else []) + [OrderBy(resolved_histogram, Direction.ASC)]
def get_conditions(self, query: QueryDict, params: Mapping[Any, Any]) -> List[Any]: query_conditions = [ Condition(Column("timestamp"), Op.GTE, self.start), Condition(Column("timestamp"), Op.LT, self.end), ] for filter_name in DIMENSION_MAP: raw_filter = query.getlist(filter_name, []) resolved_filter = DIMENSION_MAP[filter_name].resolve_filter(raw_filter) if len(resolved_filter) > 0: query_conditions.append(Condition(Column(filter_name), Op.IN, resolved_filter)) if "project_id" in params: query_conditions.append( Condition(Column("project_id"), Op.IN, params["project_id"]), ) if "organization_id" in params: query_conditions.append( Condition(Column("org_id"), Op.EQ, params["organization_id"]), ) return query_conditions
def parse_condition(cond: Sequence[Any]) -> Condition: """ Convert a legacy condition into an SDK condition. :param cond: A legacy condition array. :type cond: Sequence[Any] """ lhs, op, rhs = _parse_condition_parts(cond) return Condition(lhs, op, rhs)
def test_filter_keys(self): query = _make_query( "statsPeriod=6h&interval=10m&groupBy=category&field=sum(times_seen)", {"organization_id": 1}, True, ) assert Condition(Column("org_id"), Op.EQ, 1) in query.conditions query = _make_query( "statsPeriod=6h&interval=1d&groupBy=category&field=sum(times_seen)", { "organization_id": 1, "project_id": [1, 2, 3, 4, 5] }, True, ) assert Condition(Column("org_id"), Op.EQ, 1) in query.conditions assert Condition(Column("project_id"), Op.IN, [1, 2, 3, 4, 5]) in query.conditions
def test_cache(self): """Minimal test to verify if use_cache works""" results = snuba.raw_snql_query( Query( "events", Entity("events"), select=[Column("event_id")], where=[ Condition(Column("project_id"), Op.EQ, self.project.id), Condition(Column("timestamp"), Op.GTE, timezone.now() - timedelta(days=1)), Condition(Column("timestamp"), Op.LT, timezone.now()), ], limit=Limit(1), ), use_cache=True, ) assert results["data"] == []
def convert_search_filter_to_condition( self, search_filter: SearchFilter, ) -> Optional[WhereType]: key_conversion_map: Mapping[str, Callable[[SearchFilter, str], WhereType]] = { "environment": self._environment_filter_converter, } name = search_filter.key.name value = search_filter.value.value # We want to use group_id elsewhere so shouldn't be removed from the dataset # but if a user has a tag with the same name we want to make sure that works if name == "group_id": name = f"tags[{name}]" if name in NO_CONVERSION_FIELDS: return elif name in key_conversion_map: return key_conversion_map[name](search_filter, name) elif name in self.field_allowlist: lhs = self.column(name) # Handle checks for existence if search_filter.operator in ("=", "!=") and search_filter.value.value == "": if search_filter.key.is_tag: return Condition(lhs, Op(search_filter.operator), value) else: # If not a tag, we can just check that the column is null. return Condition(Function("ifNull", [lhs]), Op(search_filter.operator), 1) if search_filter.value.is_wildcard(): condition = Condition( Function("match", [lhs, f"'(?i){value}'"]), Op(search_filter.operator), 1, ) else: condition = Condition(lhs, Op(search_filter.operator), value) return condition else: raise NotImplementedError(f"{name} not implemented in snql filter parsing yet")