def _build_where( self, query_definition: QueryDefinition ) -> List[Union[BooleanCondition, Condition]]: assert self._projects org_id = self._projects[0].organization_id where: List[Union[BooleanCondition, Condition]] = [ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, [p.id for p in self._projects]), Condition(Column(TS_COL_QUERY), Op.GTE, query_definition.start), Condition(Column(TS_COL_QUERY), Op.LT, query_definition.end), ] filter_ = resolve_tags(query_definition.parsed_query) if filter_: where.extend(filter_) return where
def _resolve_failure_count( self, _: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: statuses = [ indexer.resolve(status) for status in constants.NON_FAILURE_STATUS ] return self._resolve_count_if( Function( "equals", [ Column("metric_id"), self.resolve_metric("transaction.duration"), ], ), Function( "notIn", [ self.builder.column("transaction.status"), list(status for status in statuses if status is not None), ], ), alias, )
def _build_totals_and_series_queries( entity, select, where, groupby, orderby, limit, offset, rollup, intervals_len ): totals_query = Query( dataset=Dataset.Metrics.value, match=Entity(entity), groupby=groupby, select=select, where=where, limit=Limit(limit or MAX_POINTS), offset=Offset(offset or 0), granularity=Granularity(rollup), orderby=orderby, ) series_query = totals_query.set_groupby( (totals_query.groupby or []) + [Column(TS_COL_GROUP)] ) # In a series query, we also need to factor in the len of the intervals array series_limit = MAX_POINTS if limit: series_limit = limit * intervals_len series_query = series_query.set_limit(series_limit) return {"totals": totals_query, "series": series_query}
def resolve_tags(input_: Any) -> Any: """Translate tags in snuba condition This assumes that all strings are either tag names or tag values, so do not pass Column("metric_id") or Column("project_id") into this function. """ if isinstance(input_, list): return [resolve_tags(item) for item in input_] if isinstance(input_, Function): if input_.function == "ifNull": # This was wrapped automatically by QueryBuilder, remove wrapper return resolve_tags(input_.parameters[0]) return Function( function=input_.function, parameters=input_.parameters and [resolve_tags(item) for item in input_.parameters], ) if isinstance(input_, Condition): return Condition(lhs=resolve_tags(input_.lhs), op=input_.op, rhs=resolve_tags(input_.rhs)) if isinstance(input_, BooleanCondition): return input_.__class__(conditions=[resolve_tags(item) for item in input_.conditions]) if isinstance(input_, Column): # HACK: Some tags already take the form "tags[...]" in discover, take that into account: if input_.subscriptable == "tags": name = input_.key else: name = input_.name return Column(name=resolve_tag_key(name)) if isinstance(input_, str): return resolve_weak(input_) return input_
def _translate_conditions(org_id: int, input_: Any) -> Any: if isinstance(input_, Column): # The only filterable tag keys are release and environment. assert input_.name in ("release", "environment") # It greatly simplifies code if we just assume that they exist. # Alternative would be: # * if tag key or value does not exist in AND-clause, return no data # * if tag key or value does not exist in OR-clause, remove condition return Column(resolve_tag_key(input_.name)) if isinstance(input_, str): # Assuming this is the right-hand side, we need to fetch a tag value. # It's OK if the tag value resolves to None, the snuba query will then # return no results, as is intended behavior return indexer.resolve(input_) if isinstance(input_, Function): return Function(function=input_.function, parameters=_translate_conditions( org_id, input_.parameters)) if isinstance(input_, Condition): return Condition( lhs=_translate_conditions(org_id, input_.lhs), op=input_.op, rhs=_translate_conditions(org_id, input_.rhs), ) if isinstance(input_, (int, float)): return input_ assert isinstance(input_, (tuple, list)), input_ return [_translate_conditions(org_id, item) for item in input_]
def test_set_sum_aggregation_for_errored_sessions(self): alias = "whatever" assert sessions_errored_set(self.metric_ids, alias) == Function( "uniqIf", [ Column("value"), Function( "in", [ Column("metric_id"), list(self.metric_ids), ], ), ], alias, )
def _to_column(query_func: SessionsQueryFunction) -> SelectableExpression: """ Converts query a function into an expression that can be directly plugged into anywhere columns are used (like the select argument of a Query) """ # distribution columns if query_func in [ "p50(session.duration)", "p75(session.duration)", "p90(session.duration)", "p95(session.duration)", "p99(session.duration)", ]: return Function( alias="percentiles", function="quantiles(0.5,0.75,0.9,0.95,0.99)", parameters=[Column("value")], ) if query_func == "avg(session.duration)": return Function( alias="avg", function="avg", parameters=[Column("value")], ) if query_func == "max(session.duration)": return Function( alias="max", function="max", parameters=[Column("value")], ) # counters if query_func == "sum(session)": return Function( alias="sum", function="sum", parameters=[Column("value")], ) # sets if query_func == "count_unique(user)": return Function( alias="count_unique", function="uniq", parameters=[Column("value")], ) raise ValueError("Unmapped metrics column", query_func)
def test_attribution_tags(self) -> None: query = (Query("events", Entity("events")).set_select( [Function("count", [], "count")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_team("sns").set_feature("test")) response = self.post("/events/snql", data=query.snuba()) resp = json.loads(response.data) assert response.status_code == 200, resp metric_calls = get_recorded_metric_calls("increment", "snuba.attribution.log") assert metric_calls is not None assert len(metric_calls) == 1 assert metric_calls[0].value > 0 assert metric_calls[0].tags["app_id"] == "default"
def expected_query(match, select, extra_groupby): return Query( dataset="metrics", match=Entity(match), select=[Column(select)], groupby=[Column("metric_id"), Column("tags[8]"), Column("tags[2]")] + extra_groupby, where=[ Condition(Column("org_id"), Op.EQ, 1), Condition(Column("project_id"), Op.EQ, 1), Condition(Column("metric_id"), Op.IN, [9, 11, 7]), Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)), Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)), Condition(Column("tags[6]"), Op.EQ, 10), ], limit=Limit(MAX_POINTS), offset=Offset(0), granularity=Granularity(query_definition.rollup), )
def _build_orderby( self, query_definition: QueryDefinition, entity: str ) -> Optional[List[OrderBy]]: if query_definition.orderby is None: return None (op, metric_name), direction = query_definition.orderby snuba_field = _OP_TO_FIELD[entity][op] return [OrderBy(Column(snuba_field.snuba_alias), direction)]
def _resolve_web_vital_function( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: str, ) -> SelectType: column = args["column"] metric_id = args["metric_id"] quality = args["quality"].lower() if column not in [ "measurements.lcp", "measurements.fcp", "measurements.fp", "measurements.fid", "measurements.cls", ]: raise InvalidSearchQuery( "count_web_vitals only supports measurements") measurement_rating = self.builder.resolve_column("measurement_rating") quality_id = indexer.resolve(quality) if quality_id is None: return Function( # This matches the type from doing `select toTypeName(count()) ...` from clickhouse "toUInt64", [0], alias, ) return Function( "countIf", [ Column("value"), Function( "and", [ Function("equals", [measurement_rating, quality_id]), Function("equals", [Column("metric_id"), metric_id]), ], ), ], alias, )
def get_single_metric_info(projects: Sequence[Project], metric_name: str) -> MetricMetaWithTagKeys: assert projects metric_id = indexer.resolve(metric_name) if metric_id is None: raise InvalidParams for metric_type in ("counter", "set", "distribution"): # TODO: What if metric_id exists for multiple types / units? entity_key = METRIC_TYPE_TO_ENTITY[metric_type] data = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column("tags.key")], where=[Condition(Column("metric_id"), Op.EQ, metric_id)], groupby=[Column("metric_id"), Column("tags.key")], referrer="snuba.metrics.meta.get_single_metric", projects=projects, org_id=projects[0].organization_id, ) if data: tag_ids = {tag_id for row in data for tag_id in row["tags.key"]} return { "name": metric_name, "type": metric_type, "operations": AVAILABLE_OPERATIONS[entity_key.value], "tags": sorted( ({ "key": reverse_resolve(tag_id) } for tag_id in tag_ids), key=itemgetter("key"), ), "unit": None, } raise InvalidParams(f"Raw metric {metric_name} does not exit")
def _build_where( self, query_definition: QueryDefinition ) -> List[Union[BooleanCondition, Condition]]: where: List[Union[BooleanCondition, Condition]] = [ Condition(Column("org_id"), Op.EQ, self._project.organization_id), Condition(Column("project_id"), Op.EQ, self._project.id), Condition( Column("metric_id"), Op.IN, [indexer.resolve(name) for _, name in query_definition.fields.values()], ), Condition(Column(TS_COL_QUERY), Op.GTE, query_definition.start), Condition(Column(TS_COL_QUERY), Op.LT, query_definition.end), ] filter_ = self._build_filter(query_definition) if filter_: where.append(filter_) return where
def _resolve_count_miserable_function( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: metric_true = indexer.resolve(constants.METRIC_TRUE_TAG_VALUE) # Nobody is miserable, we can return 0 if metric_true is None: return Function( "toUInt64", [0], alias, ) return Function( "uniqIf", [ Column("value"), Function( "and", [ Function( "equals", [ Column("metric_id"), args["metric_id"], ], ), Function( "equals", [ self.builder.column( constants.METRIC_MISERABLE_TAG_KEY), metric_true ], ), ], ), ], alias, )
def get_tag_values( self, tag_name: str, metric_names: Optional[Sequence[str]] ) -> Sequence[TagValue]: """Get all known values for a specific tag""" tag_id = indexer.resolve(tag_name) if tag_id is None: raise InvalidParams where = self._get_metrics_filter(metric_names) if where is None: return [] tags = defaultdict(list) column_name = f"tags[{tag_id}]" for metric_type in ("counter", "set", "distribution"): # TODO: What if metric_id exists for multiple types / units? entity_key = METRIC_TYPE_TO_ENTITY[metric_type] rows = self._get_data( entity_key=entity_key, select=[Column("metric_id"), Column(column_name)], where=where, groupby=[Column("metric_id"), Column(column_name)], referrer="snuba.metrics.meta.get_tag_values", ) for row in rows: value_id = row[column_name] if value_id > 0: metric_id = row["metric_id"] tags[metric_id].append(value_id) value_id_lists = tags.values() if metric_names is not None: # Only return tags that occur in all metrics value_ids = set.intersection(*[set(ids) for ids in value_id_lists]) else: value_ids = {value_id for ids in value_id_lists for value_id in ids} tags = [{"key": tag_name, "value": reverse_resolve(value_id)} for value_id in value_ids] tags.sort(key=itemgetter("key")) return tags
def _count_users(total: bool, referrer: str) -> Dict[Any, int]: query = Query( dataset=Dataset.Metrics.value, match=Entity(EntityKey.MetricsSets.value), select=[Column("value")], where=_get_common_where(total) + [ Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "user")), ], groupby=_get_common_groupby(total), ) return _convert_results( raw_snql_query( query, referrer=referrer, use_cache=False, )["data"], total=total, )
def _resolve_percentile( self, args: Mapping[str, Union[str, Column, SelectType, int, float]], alias: str, fixed_percentile: float, ) -> SelectType: return Function( "arrayElement", [ Function( f"quantilesIf({fixed_percentile})", [ Column("value"), Function("equals", [Column("metric_id"), args["metric_id"]]), ], ), 1, ], alias, )
def _get_snuba_query( org_id: int, query: QueryDefinition, entity_key: EntityKey, metric_id: int, columns: Sequence[str], series: bool, extra_conditions: List[Condition], remove_groupby: Set[Column], ) -> Query: """Build the snuba query""" conditions = [ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, query.filter_keys["project_id"]), Condition(Column("metric_id"), Op.EQ, metric_id), Condition(Column(TS_COL_QUERY), Op.GTE, query.start), Condition(Column(TS_COL_QUERY), Op.LT, query.end), ] conditions += _get_filter_conditions(org_id, query.conditions) conditions += extra_conditions groupby_tags = [field for field in query.raw_groupby if field != "project"] tag_keys = {field: _resolve(field) for field in groupby_tags} groupby = { field: Column(f"tags[{tag_id}]") for field, tag_id in tag_keys.items() if tag_id is not None # exclude unresolved keys from groupby } if "project" in query.raw_groupby: groupby["project"] = Column("project_id") full_groupby = set(groupby.values()) - remove_groupby if series: full_groupby.add(Column(TS_COL_GROUP)) return Query( dataset=Dataset.Metrics.value, match=Entity(entity_key.value), select=[Column(column) for column in columns], groupby=list(full_groupby), where=conditions, granularity=Granularity(query.rollup), )
def __counter_sum_aggregation_on_session_status_factory(session_status, metric_ids, alias=None): return Function( "sumIf", [ Column("value"), Function( "and", [ Function( "equals", [ Column(f"tags[{resolve_weak('session.status')}]"), resolve_weak(session_status), ], ), Function("in", [Column("metric_id"), list(metric_ids)]), ], ), ], alias, )
def update(self, groupby: Collection[Column], snuba_rows: _SnubaData) -> None: if self.initialized: return # Only "totals" queries may set the limiting conditions: assert Column(TS_COL_GROUP) not in groupby self._groupby = list(groupby) # Make sure groupby has a fixed order self._groups = [ {column.name: row[column.name] for column in self._groupby} for row in snuba_rows ] self.initialized = True
def _get_entity_of_metric_name(projects: Sequence[Project], metric_name: str) -> EntityKey: assert projects metric_id = indexer.resolve(metric_name) if metric_id is None: raise InvalidParams for metric_type in ("counter", "set", "distribution"): entity_key = METRIC_TYPE_TO_ENTITY[metric_type] data = run_metrics_query( entity_key=entity_key, select=[Column("metric_id")], where=[Condition(Column("metric_id"), Op.EQ, metric_id)], groupby=[Column("metric_id")], referrer="snuba.metrics.meta.get_entity_of_metric", projects=projects, org_id=projects[0].organization_id, ) if data: return entity_key raise InvalidParams(f"Raw metric {metric_name} does not exit")
def test_sessions_query(self) -> None: query = (Query("sessions", Entity("sessions")).set_select( [Column("project_id"), Column("release")]).set_groupby( [Column("project_id"), Column("release")]).set_where([ Condition(Column("project_id"), Op.IN, [self.project_id]), Condition(Column("org_id"), Op.EQ, self.org_id), Condition( Column("started"), Op.GTE, datetime(2021, 1, 1, 17, 5, 59, 554860), ), Condition(Column("started"), Op.LT, datetime(2022, 1, 1, 17, 6, 0, 554981)), ]).set_orderby([OrderBy(Column("sessions"), Direction.DESC)]).set_limit(100)) response = self.post("/sessions/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200 assert data["data"] == []
def test_tags_in_groupby(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("tags[k8s-app]")]).set_where([ Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), Condition(Column("tags[k8s-app]"), Op.NEQ, ""), Condition(Column("type"), Op.NEQ, "transaction"), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data
def _resolve_apdex_function( self, _: Mapping[str, Union[str, Column, SelectType, int, float]], alias: Optional[str] = None, ) -> SelectType: metric_true = indexer.resolve(constants.METRIC_TRUE_TAG_VALUE) # Nothing is satisfied or tolerated, the score must be 0 if metric_true is None: return Function( "toUInt64", [0], alias, ) satisfied = Function("equals", [ self.builder.column(constants.METRIC_SATISFIED_TAG_KEY), metric_true ]) tolerable = Function("equals", [ self.builder.column(constants.METRIC_TOLERATED_TAG_KEY), metric_true ]) metric_condition = Function( "equals", [Column("metric_id"), self.resolve_metric("transaction.duration")]) return Function( "divide", [ Function( "plus", [ self._resolve_count_if(metric_condition, satisfied), Function( "divide", [ self._resolve_count_if(metric_condition, tolerable), 2 ], ), ], ), Function("countIf", [metric_condition]), ], alias, )
def get_changed_project_release_model_adoptions( self, project_ids: Sequence[ProjectId], ) -> Sequence[ProjectRelease]: now = datetime.now(pytz.utc) start = now - timedelta(days=3) projects_ids = list(project_ids) if len(projects_ids) == 0: return [] org_id = self._get_org_id(project_ids) release_column_name = tag_key(org_id, "release") query_cols = [Column("project_id"), Column(release_column_name)] group_by = query_cols where_clause = [ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "session")), Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, now), ] query = Query( dataset=Dataset.Metrics.value, match=Entity("metrics_counters"), select=query_cols, where=where_clause, groupby=group_by, ) result = raw_snql_query( query, referrer= "release_health.metrics.get_changed_project_release_model_adoptions", use_cache=False, ) def extract_row_info( row: Mapping[str, Union[OrganizationId, str]]) -> ProjectRelease: return row.get("project_id"), reverse_tag_value( org_id, row.get(release_column_name)) # type: ignore return [extract_row_info(row) for row in result["data"]]
def check_releases_have_health_data( self, organization_id: OrganizationId, project_ids: Sequence[ProjectId], release_versions: Sequence[ReleaseName], start: datetime, end: datetime, ) -> Set[ReleaseName]: release_column_name = tag_key(organization_id, "release") releases_ids = [ release_id for release_id in [ try_get_string_index(organization_id, release) for release in release_versions ] if release_id is not None ] query = Query( dataset=Dataset.Metrics.value, match=Entity("metrics_counters"), select=[Column(release_column_name)], where=[ Condition(Column("org_id"), Op.EQ, organization_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("metric_id"), Op.EQ, metric_id(organization_id, "session")), Condition(Column(release_column_name), Op.IN, releases_ids), Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), ], groupby=[Column(release_column_name)], ) result = raw_snql_query( query, referrer="release_health.metrics.check_releases_have_health_data", use_cache=False, ) def extract_row_info( row: Mapping[str, Union[OrganizationId, str]]) -> ReleaseName: return reverse_tag_value( organization_id, row.get(release_column_name)) # type: ignore return {extract_row_info(row) for row in result["data"]}
def _get_metrics_filter( self, metric_names: Optional[Sequence[str]] ) -> Optional[List[Condition]]: """Add a condition to filter by metrics. Return None if a name cannot be resolved.""" where = [] if metric_names is not None: metric_ids = [] for name in metric_names: resolved = indexer.resolve(name) if resolved is None: # We are looking for tags that appear in all given metrics. # A tag cannot appear in a metric if the metric is not even indexed. return None metric_ids.append(resolved) where.append(Condition(Column("metric_id"), Op.IN, metric_ids)) return where
def test_arrayjoin(self) -> None: query = (Query("events", Entity("events")).set_select([ Function("count", [], "times_seen"), Function("min", [Column("timestamp")], "first_seen"), Function("max", [Column("timestamp")], "last_seen"), ]).set_groupby([Column("exception_frames.filename")]).set_array_join( [Column("exception_frames.filename")]).set_where([ Condition(Column("exception_frames.filename"), Op.LIKE, "%.java"), Condition(Column("project_id"), Op.EQ, self.project_id), Condition(Column("timestamp"), Op.GTE, self.base_time), Condition(Column("timestamp"), Op.LT, self.next_time), ]).set_orderby([ OrderBy( Function("max", [Column("timestamp")], "last_seen"), Direction.DESC, ) ]).set_limit(1000)) response = self.post("/events/snql", data=query.snuba()) data = json.loads(response.data) assert response.status_code == 200, data assert len(data["data"]) == 6
def _to_column( query_func: SessionsQueryFunction, column_condition: SelectableExpression = 1) -> SelectableExpression: """ Converts query a function into an expression that can be directly plugged into anywhere columns are used (like the select argument of a Query) """ parameters = (Column("value"), column_condition) # distribution columns if query_func in _DURATION_PERCENTILES: return Function( alias="percentiles", function="quantilesIf(0.5,0.75,0.9,0.95,0.99)", parameters=parameters, ) if query_func == "avg(session.duration)": return Function( alias="avg", function="avgIf", parameters=parameters, ) if query_func == "max(session.duration)": return Function( alias="max", function="maxIf", parameters=parameters, ) # counters if query_func == "sum(session)": return Function( alias="sum", function="sumIf", parameters=parameters, ) # sets if query_func == "count_unique(user)": return Function( alias="count_unique", function="uniqIf", parameters=parameters, ) raise ValueError("Unmapped metrics column", query_func)
def _get_crash_free_rate_data( org_id: int, project_ids: Sequence[int], start: datetime, end: datetime, rollup: int, ) -> Dict[int, Dict[str, float]]: data: Dict[int, Dict[str, float]] = {} session_status = tag_key(org_id, "session.status") count_query = Query( dataset=Dataset.Metrics.value, match=Entity(EntityKey.MetricsCounters.value), select=[Column("value")], where=[ Condition(Column("org_id"), Op.EQ, org_id), Condition(Column("project_id"), Op.IN, project_ids), Condition(Column("metric_id"), Op.EQ, metric_id(org_id, "session")), Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), ], groupby=[ Column("project_id"), Column(session_status), ], granularity=Granularity(rollup), ) count_data = raw_snql_query( count_query, referrer="release_health.metrics.get_crash_free_data", use_cache=False)["data"] for row in count_data: project_data = data.setdefault(row["project_id"], {}) tag_value = reverse_tag_value(org_id, row[session_status]) project_data[tag_value] = row["value"] return data