def __init__( self, dataset: Dataset, params: ParamsType, granularity: int, query: Optional[str] = None, selected_columns: Optional[List[str]] = None, equations: Optional[List[str]] = None, functions_acl: Optional[List[str]] = None, limit: Optional[int] = 10000, ): super().__init__( dataset, params, auto_fields=False, functions_acl=functions_acl, equation_config={ "auto_add": True, "aggregates_only": True }, ) self.where, self.having = self.resolve_conditions( query, use_aggregate_conditions=False) self.limit = None if limit is None else Limit(limit) # params depends on parse_query, and conditions being resolved first since there may be projects in conditions self.where += self.resolve_params() self.columns = self.resolve_select(selected_columns, equations) self.granularity = Granularity(granularity) # This is a timeseries, the groupby will always be time self.groupby = [self.time_column]
def test_simple(self) -> None: query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 2 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2
def build_project_usage_outcomes(start__stop, project): start, stop = start__stop # XXX(epurkhiser): Tsdb used to use day buckets, where the end would # represent a whole day. Snuba queries more accurately thus we must # capture the entire last day end = stop + timedelta(days=1) query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("outcome"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition( Column("outcome"), Op.IN, [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("outcome"), Column("category")], granularity=Granularity(ONE_DAY), ) data = raw_snql_query(query, referrer="reports.outcomes")["data"] return ( # Accepted errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.ACCEPTED), # Dropped errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.RATE_LIMITED), # accepted transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.ACCEPTED), # Dropped transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.RATE_LIMITED), )
def run_outcomes_query_timeseries(query: QueryDefinition) -> ResultSet: snql_query = Query( dataset=query.dataset.value, match=Entity(query.match), select=query.select_params, groupby=query.group_by + [Column(TS_COL)], where=query.conditions, limit=Limit(10000), offset=Offset(0), granularity=Granularity(query.rollup), ) result_timeseries = raw_snql_query(snql_query, referrer="outcomes.timeseries") return _format_rows(result_timeseries["data"], query)
def test_orderby(self) -> None: self.project_id3 = next(self.id_iter) self.org_id2 = next(self.id_iter) self.generate_session_events(self.org_id2, self.project_id3) query = Query( dataset="sessions", match=Entity("org_sessions"), select=[Column("org_id"), Column("project_id")], groupby=[Column("org_id"), Column("project_id")], where=[ Condition(Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("started"), Op.LT, datetime.utcnow()), ], granularity=Granularity(3600), orderby=[OrderBy(Column("org_id"), Direction.ASC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id assert data["data"][0]["project_id"] == self.project_id assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id2 assert data["data"][2]["org_id"] == self.org_id2 assert data["data"][2]["project_id"] == self.project_id3 query = query.set_orderby( [OrderBy(Column("org_id"), Direction.DESC)], ) response = self.app.post( "/sessions/snql", data=query.snuba(), ) data = json.loads(response.data) assert response.status_code == 200, response.data assert len(data["data"]) == 3 assert data["data"][0]["org_id"] == self.org_id2 assert data["data"][0]["project_id"] == self.project_id3 assert data["data"][1]["org_id"] == self.org_id assert data["data"][1]["project_id"] == self.project_id assert data["data"][2]["org_id"] == self.org_id assert data["data"][2]["project_id"] == self.project_id2
def _get_snuba_query( org_id: int, query: QueryDefinition, entity_key: EntityKey, metric_id: int, columns: Sequence[str], series: bool, extra_conditions: List[Condition], remove_groupby: Set[Column], ) -> Query: """Build the snuba query""" conditions = [ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, query.filter_keys["project_id"]), Condition(Column("metric_id"), Op.EQ, metric_id), Condition(Column(TS_COL_QUERY), Op.GTE, query.start), Condition(Column(TS_COL_QUERY), Op.LT, query.end), ] conditions += _get_filter_conditions(org_id, query.conditions) conditions += extra_conditions groupby_tags = [field for field in query.raw_groupby if field != "project"] tag_keys = {field: _resolve(field) for field in groupby_tags} groupby = { field: Column(f"tags[{tag_id}]") for field, tag_id in tag_keys.items() if tag_id is not None # exclude unresolved keys from groupby } if "project" in query.raw_groupby: groupby["project"] = Column("project_id") full_groupby = set(groupby.values()) - remove_groupby if series: full_groupby.add(Column(TS_COL_GROUP)) return Query( dataset=Dataset.Metrics.value, match=Entity(entity_key.value), select=[Column(column) for column in columns], groupby=list(full_groupby), where=conditions, granularity=Granularity(query.rollup), )
def _get_crash_free_rate_data( org_id: int, project_ids: Sequence[int], start: datetime, end: datetime, rollup: int, ) -> Dict[int, Dict[str, float]]: data: Dict[int, Dict[str, float]] = {} session_status = tag_key(org_id, "session.status") count_query = Query( dataset=Dataset.Metrics.value, match=Entity(EntityKey.MetricsCounters.value), select=[Column("value")], where=[ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "session")), Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), ], groupby=[ Column("project_id"), Column(session_status), ], granularity=Granularity(rollup), ) count_data = raw_snql_query( count_query, referrer="release_health.metrics.get_crash_free_data", use_cache=False)["data"] for row in count_data: project_data = data.setdefault(row["project_id"], {}) tag_value = reverse_tag_value(org_id, row[session_status]) project_data[tag_value] = row["value"] return data
def get_oldest_health_data_for_releases( self, project_releases: Sequence[ProjectRelease], ) -> Mapping[ProjectRelease, str]: now = datetime.now(pytz.utc) start = now - timedelta(days=90) project_ids: List[ProjectId] = [x[0] for x in project_releases] org_id = self._get_org_id(project_ids) release_column_name = tag_key(org_id, "release") releases = [x[1] for x in project_releases] releases_ids = [ release_id for release_id in [try_get_string_index(org_id, release) for release in releases] if release_id is not None ] query_cols = [ Column("project_id"), Column(release_column_name), Function("min", [Column("bucketed_time")], "oldest"), ] group_by = [ Column("project_id"), Column(release_column_name), ] where_clause = [ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "session")), Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, now), Condition(Column(release_column_name), Op.IN, releases_ids), ] query = Query( dataset=Dataset.Metrics.value, match=Entity("metrics_counters"), select=query_cols, where=where_clause, groupby=group_by, granularity=Granularity(3600), ) rows = raw_snql_query( query, referrer= "release_health.metrics.get_oldest_health_data_for_releases", use_cache=False, )["data"] result = {} for row in rows: result[row["project_id"], reverse_tag_value(org_id, row[release_column_name] )] = row["oldest"] return result
groupby=[Column("title")], where=[ Condition(Column("timestamp"), Op.GT, NOW), Condition(Function("toHour", [Column("timestamp")]), Op.LTE, NOW), Condition(Column("project_id"), Op.IN, Function("tuple", [1, 2, 3])), ], having=[ Condition(Function("uniq", [Column("event_id")]), Op.GT, 1) ], orderby=[OrderBy(Column("title"), Direction.ASC)], limitby=LimitBy(Column("title"), 5), limit=Limit(10), offset=Offset(1), granularity=Granularity(3600), totals=Totals(True), ), ( "MATCH (events SAMPLE 1000)", "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50", "BY title", ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') " "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') " "AND project_id IN tuple(1, 2, 3)"), "HAVING uniq(event_id) > 1", "ORDER BY title ASC", "LIMIT 5 BY title", "LIMIT 10", "OFFSET 1", "GRANULARITY 3600",
def build_project_series(start__stop, project): start, stop = start__stop rollup = ONE_DAY resolution, series = tsdb.get_optimal_rollup_series(start, stop, rollup) assert resolution == rollup, "resolution does not match requested value" clean = partial(clean_series, start, stop, rollup) def zerofill_clean(data): return clean(zerofill(data, start, stop, rollup, fill_default=0)) # Note: this section can be removed issue_ids = project.group_set.filter(status=GroupStatus.RESOLVED, resolved_at__gte=start, resolved_at__lt=stop).values_list( "id", flat=True) # TODO: The TSDB calls could be replaced with a SnQL call here tsdb_range_resolved = _query_tsdb_groups_chunked(tsdb.get_range, issue_ids, start, stop, rollup) resolved_error_series = reduce( merge_series, map(clean, tsdb_range_resolved.values()), clean([(timestamp, 0) for timestamp in series]), ) # end # Use outcomes to compute total errors and transactions outcomes_query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("time"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition(Column("outcome"), Op.EQ, Outcome.ACCEPTED), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("time"), Column("category")], granularity=Granularity(rollup), orderby=[OrderBy(Column("time"), Direction.ASC)], ) outcome_series = raw_snql_query(outcomes_query, referrer="reports.outcome_series") total_error_series = OrderedDict() for v in outcome_series["data"]: if v["category"] in DataCategory.error_categories(): timestamp = int(to_timestamp(parse_snuba_datetime(v["time"]))) total_error_series[timestamp] = total_error_series.get( timestamp, 0) + v["total"] total_error_series = zerofill_clean(list(total_error_series.items())) transaction_series = [(int(to_timestamp(parse_snuba_datetime(v["time"]))), v["total"]) for v in outcome_series["data"] if v["category"] == DataCategory.TRANSACTION] transaction_series = zerofill_clean(transaction_series) error_series = merge_series( resolved_error_series, total_error_series, lambda resolved, total: (resolved, total - resolved), # Resolved, Unresolved ) # Format of this series: [(resolved , unresolved, transactions)] return merge_series( error_series, transaction_series, lambda errors, transactions: errors + (transactions, ), )
def test_granularity(value: Any, exception: Optional[Exception]) -> None: if exception is not None: with pytest.raises(type(exception), match=re.escape(str(exception))): Granularity(value) else: assert Granularity(value).granularity == value
def set_granularity(self, granularity: int) -> "Query": return self._replace("granularity", Granularity(granularity))