def monitor_release_adoption(**kwargs): metrics.incr("sentry.tasks.monitor_release_adoption.start", sample_rate=1.0) # 1. Query snuba for all project ids that have sessions. with metrics.timer( "sentry.tasks.monitor_release_adoption.aggregate_projects.loop", sample_rate=1.0 ): aggregated_projects = defaultdict(list) start_time = time.time() offset = 0 while (time.time() - start_time) < MAX_SECONDS: query = ( Query( dataset="sessions", match=Entity("org_sessions"), select=[ Column("org_id"), Column("project_id"), ], groupby=[Column("org_id"), Column("project_id")], where=[ Condition( Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6) ), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), orderby=[ OrderBy(Column("org_id"), Direction.ASC), OrderBy(Column("project_id"), Direction.ASC), ], ) .set_limit(CHUNK_SIZE + 1) .set_offset(offset) ) data = snuba.raw_snql_query(query, referrer="tasks.monitor_release_adoption")["data"] count = len(data) more_results = count > CHUNK_SIZE offset += CHUNK_SIZE if more_results: data = data[:-1] for row in data: aggregated_projects[row["org_id"]].append(row["project_id"]) if not more_results: break else: logger.info( "monitor_release_adoption.loop_timeout", sample_rate=1.0, extra={"offset": offset}, ) with metrics.timer( "sentry.tasks.monitor_release_adoption.process_projects_with_sessions", sample_rate=1.0 ): for org_id in aggregated_projects: process_projects_with_sessions.delay(org_id, aggregated_projects[org_id])
def test_sub_query(self) -> None: inner_query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count") ]).set_groupby([Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ])) query = (Query("discover", inner_query).set_select( [Function("avg", [Column("count")], "avg_count")]).set_orderby([ OrderBy(Function("avg", [Column("count")], "avg_count"), Direction.ASC) ]).set_limit(1000)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["data"] == [{"avg_count": 1.0}]
def test_generate_order_by_clause(self): for derived_metric_name in DERIVED_METRICS.keys(): if derived_metric_name == self.crash_free_fake.metric_name: continue derived_metric_obj = DERIVED_METRICS[derived_metric_name] assert derived_metric_obj.generate_orderby_clause( projects=[self.project], direction=Direction.ASC) == [ OrderBy( derived_metric_obj.generate_select_statements( [self.project])[0], Direction.ASC) ] with pytest.raises(DerivedMetricParseException): self.crash_free_fake.generate_orderby_clause( projects=[self.project], direction=Direction.DESC)
def test_build_snuba_query_orderby(mock_now, mock_now2, mock_indexer): mock_indexer.resolve = MockIndexer().resolve query_params = MultiValueDict({ "query": ["release:staging" ], # weird release but we need a string exising in mock indexer "groupBy": ["session.status", "environment"], "field": [ "sum(sentry.sessions.session)", ], "orderBy": ["-sum(sentry.sessions.session)"], "limit": [3], }) query_definition = QueryDefinition(query_params) snuba_queries = SnubaQueryBuilder([PseudoProject(1, 1)], query_definition).get_snuba_queries() counter_queries = snuba_queries.pop("metrics_counters") assert not snuba_queries assert counter_queries["series"] is None # No series because of orderBy assert counter_queries["totals"] == Query( dataset="metrics", match=Entity("metrics_counters"), select=[Function("sum", [Column("value")], "value")], groupby=[ Column("metric_id"), Column("tags[8]"), Column("tags[2]"), ], where=[ Condition(Column("org_id"), Op.EQ, 1), Condition(Column("project_id"), Op.IN, [1]), Condition(Column("metric_id"), Op.IN, [9]), Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)), Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)), Condition(Column("tags[6]", entity=None), Op.IN, [10]), ], orderby=[OrderBy(Column("value"), Direction.DESC)], limit=Limit(3), offset=Offset(0), granularity=Granularity(query_definition.rollup), )
def test_sessions_query(self) -> None: query = (Query("sessions", Entity("sessions")).set_select( [Column("project_id"), Column("release")]).set_groupby( [Column("project_id"), Column("release")]).set_where([ Condition(Column("project_id"), Op.IN, [self.project_id]), Condition(Column("org_id"), Op.EQ, self.org_id), Condition( Column("started"), Op.GTE, datetime(2021, 1, 1, 17, 5, 59, 554860), ), Condition(Column("started"), Op.LT, datetime(2022, 1, 1, 17, 6, 0, 554981)), ]).set_orderby([OrderBy(Column("sessions"), Direction.DESC)]).set_limit(100)) response = self.post("/sessions/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def test_tags_in_groupby(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("tags[k8s-app]")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("tags[k8s-app]"), Op.NEQ, ""), Condition(Column("type"), Op.NEQ, "transaction"), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def test_simple_query(self) -> None: query = (Query("discover", Entity("discover_events")).set_select( [Function("count", [], "count")]).set_groupby( [Column("project_id"), Column("tags[custom_tag]")]).set_where([ Condition(Column("type"), Op.NEQ, "transaction"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy(Function("count", [], "count"), Direction.ASC) ]).set_limit(1000).set_consistent(True).set_debug(True)) response = self.post("/discover/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert data["stats"]["consistent"] assert data["data"] == [{ "count": 1, "tags[custom_tag]": "custom_value", "project_id": self.project_id, }]
def test_arrayjoin(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("exception_frames.filename")]).set_array_join( [Column("exception_frames.filename")]).set_where([ Condition(Column("exception_frames.filename"), Op.LIKE, "%.java"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert len(data["data"]) == 6
def _get_snuba_query( org_id: int, query: QueryDefinition, entity_key: EntityKey, metric_id: int, columns: List[SelectableExpression], series: bool, limit_state: _LimitState, extra_conditions: List[Condition], ) -> Optional[Query]: """Build the snuba query Return None if the results from the initial totals query was empty. """ conditions = [ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, query.filter_keys["project_id"]), Condition(Column("metric_id"), Op.EQ, metric_id), Condition(Column(TS_COL_QUERY), Op.GTE, query.start), Condition(Column(TS_COL_QUERY), Op.LT, query.end), ] conditions += _get_filter_conditions(org_id, query.conditions) conditions += extra_conditions groupby = {} for field in query.raw_groupby: if field == "session.status": # This will be handled by conditional aggregates continue if field == "project": groupby["project"] = Column("project_id") continue try: groupby[field] = Column(resolve_tag_key(field)) except MetricIndexNotFound: # exclude unresolved keys from groupby pass full_groupby = list(set(groupby.values())) if series: full_groupby.append(Column(TS_COL_GROUP)) query_args = dict( dataset=Dataset.Metrics.value, match=Entity(entity_key.value), select=columns, groupby=full_groupby, where=conditions, granularity=Granularity(query.rollup), ) # In case of group by, either set a limit or use the groups from the # first query to limit the results: if query.raw_groupby: if not limit_state.initialized: # Set limit and order by to be consistent with sessions_v2 max_groups = SNUBA_LIMIT // len(get_timestamps(query)) query_args["limit"] = Limit(max_groups) query_args["orderby"] = [OrderBy(columns[0], Direction.DESC)] else: if limit_state.limiting_conditions is None: # Initial query returned no results, no need to run any more queries return None query_args["where"] += limit_state.limiting_conditions query_args["limit"] = Limit(SNUBA_LIMIT) return Query(**query_args)
def sum_sessions_and_releases(org_id, project_ids): # Takes a single org id and a list of project ids # returns counts of releases and sessions across all environments and passed project_ids for the last 6 hours start_time = time.time() offset = 0 totals = defaultdict(dict) with metrics.timer( "sentry.tasks.monitor_release_adoption.process_projects_with_sessions.loop" ): while (time.time() - start_time) < MAX_SECONDS: with metrics.timer( "sentry.tasks.monitor_release_adoption.process_projects_with_sessions.query" ): query = (Query( dataset="sessions", match=Entity("sessions"), select=[ Column("sessions"), ], groupby=[ Column("org_id"), Column("project_id"), Column("release"), Column("environment"), ], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, project_ids), ], granularity=Granularity(21600), orderby=[ OrderBy(Column("org_id"), Direction.ASC), OrderBy(Column("project_id"), Direction.ASC), ], ).set_limit(CHUNK_SIZE + 1).set_offset(offset)) data = snuba.raw_snql_query( query, referrer="tasks.process_projects_with_sessions.session_count" )["data"] count = len(data) more_results = count > CHUNK_SIZE offset += CHUNK_SIZE if more_results: data = data[:-1] for row in data: row_totals = totals[row["project_id"]].setdefault( row["environment"], { "total_sessions": 0, "releases": defaultdict(int) }) row_totals["total_sessions"] += row["sessions"] row_totals["releases"][row["release"]] += row["sessions"] if not more_results: break else: logger.info( "process_projects_with_sessions.loop_timeout", extra={ "org_id": org_id, "project_ids": project_ids }, ) return totals
def test_suspect_spans_lambdas(self) -> None: query = (Query( "discover", Entity("discover_transactions") ).set_select([ Column("spans.op"), Column("spans.group"), Function( "arrayReduce", [ "sumIf", Column("spans.exclusive_time_32"), Function( "arrayMap", [ Lambda( ["x", "y"], Function( "if", [ Function( "equals", [ Function( "and", [ Function( "equals", [ Identifier( "x"), "db", ], ), Function( "equals", [ Identifier( "y"), "05029609156d8133", ], ), ], ), 1, ], ), 1, 0, ], ), ), Column("spans.op"), Column("spans.group"), ], ), ], "array_spans_exclusive_time", ), ]).set_where([ Condition(Column("transaction_name"), Op.EQ, "/api/do_things"), Condition(Function("has", [Column("spans.op"), "db"]), Op.EQ, 1), Condition( Function("has", [Column("spans.group"), "05029609156d8133"]), Op.EQ, 1, ), Condition(Column("duration"), Op.LT, 900000.0), Condition(Column("finish_ts"), Op.GTE, self.base_time), Condition(Column("finish_ts"), Op.LT, self.next_time), Condition(Column("project_id"), Op.IN, (self.project_id, )), ]).set_orderby( [OrderBy(Column("array_spans_exclusive_time"), Direction.DESC)]).set_limit(10)) response = self.post("/discover/snql", data=query.snuba()) resp = json.loads(response.data) assert response.status_code == 200, resp data = resp["data"] assert len(data) == 1 assert data[0]["array_spans_exclusive_time"] > 0
Query( dataset="discover", match=Entity("events", "ev", 0.2), select=[ Column("title"), Column("tags[release:1]"), Function("uniq", [Column("event_id")], "uniq_events"), ], groupby=[Column("title"), Column("tags[release:1]")], where=[ Condition(Column("timestamp"), Op.GT, NOW), Condition(Function("toHour", [Column("timestamp")]), Op.LTE, NOW), Condition(Column("project_id"), Op.IN, Function("tuple", [1, 2, 3])), ], having=[Condition(Function("uniq", [Column("event_id")]), Op.GT, 1)], orderby=[OrderBy(Column("title"), Direction.ASC)], limitby=LimitBy([Column("title"), Column("event_id")], 5), limit=Limit(10), offset=Offset(1), granularity=Granularity(3600), debug=Debug(True), ), id="complex query", ), pytest.param( Query("discover", Entity("events", None, 0.2)) .set_select([Column("event_id")]) .set_where([Condition(Column("timestamp"), Op.GT, NOW)]) .set_limit(10) .set_offset(1) .set_granularity(3600),
def test_build_snuba_query_orderby(mock_now, mock_now2, monkeypatch): monkeypatch.setattr("sentry.sentry_metrics.indexer.resolve", MockIndexer().resolve) query_params = MultiValueDict({ "query": ["release:staging" ], # weird release but we need a string exising in mock indexer "groupBy": ["session.status", "environment"], "field": [ "sum(sentry.sessions.session)", ], "orderBy": ["-sum(sentry.sessions.session)"], }) query_definition = QueryDefinition(query_params, paginator_kwargs={"limit": 3}) snuba_queries, _ = SnubaQueryBuilder([PseudoProject(1, 1)], query_definition).get_snuba_queries() counter_queries = snuba_queries.pop("metrics_counters") assert not snuba_queries op = "sum" metric_name = "sentry.sessions.session" select = Function( OP_TO_SNUBA_FUNCTION["metrics_counters"]["sum"], [ Column("value"), Function("equals", [Column("metric_id"), resolve_weak(metric_name)]) ], alias=f"{op}({metric_name})", ) assert counter_queries["totals"] == Query( dataset="metrics", match=Entity("metrics_counters"), select=[select], groupby=[ Column("tags[8]"), Column("tags[2]"), ], where=[ Condition(Column("org_id"), Op.EQ, 1), Condition(Column("project_id"), Op.IN, [1]), Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)), Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)), Condition(Column("tags[6]", entity=None), Op.IN, [10]), Condition(Column("metric_id"), Op.IN, [9]), ], orderby=[OrderBy(select, Direction.DESC)], limit=Limit(3), offset=Offset(0), granularity=Granularity(query_definition.rollup), ) assert counter_queries["series"] == Query( dataset="metrics", match=Entity("metrics_counters"), select=[select], groupby=[ Column("tags[8]"), Column("tags[2]"), Column("bucketed_time"), ], where=[ Condition(Column("org_id"), Op.EQ, 1), Condition(Column("project_id"), Op.IN, [1]), Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)), Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)), Condition(Column("tags[6]", entity=None), Op.IN, [10]), Condition(Column("metric_id"), Op.IN, [9]), ], orderby=[OrderBy(select, Direction.DESC)], limit=Limit(6480), offset=Offset(0), granularity=Granularity(query_definition.rollup), )