def get_teams(request, organization, teams=None): # do normal teams lookup based on request params requested_teams = set(request.GET.getlist("team", [])) if teams is None else teams verified_ids = set() if "myteams" in requested_teams: requested_teams.remove("myteams") if is_active_superuser(request): # retrieve all teams within the organization myteams = Team.objects.filter( organization=organization, status=TeamStatus.VISIBLE).values_list("id", flat=True) verified_ids.update(myteams) else: myteams = [t.id for t in request.access.teams] verified_ids.update(myteams) for team_id in requested_teams: # Verify each passed Team id is numeric if type(team_id) is not int and not team_id.isdigit(): raise InvalidParams(f"Invalid Team ID: {team_id}") requested_teams.update(verified_ids) teams_query = Team.objects.filter(id__in=requested_teams) for team in teams_query: if team.id in verified_ids: continue if not request.access.has_team_access(team): raise InvalidParams( f"Error: You do not have permission to access {team.name}", ) return teams_query
def _parse_orderby(self, query_params): orderby = query_params.getlist("orderBy", []) if not orderby: return None elif len(orderby) > 1: raise InvalidParams("Only one 'orderBy' is supported") if len(self.fields) != 1: # If we were to allow multiple fields when `orderBy` is set, # we would require two snuba queries: one to get the sorted metric, # And one to get the fields that we are not currently sorting by. # # For example, the query # # ?field=sum(foo)&field=sum(bar)&groupBy=tag1&orderBy=sum(foo)&limit=1 # # with snuba entries (simplified) # # | metric | tag1 | sum(value) | # |----------------------------| # | foo | val1 | 2 | # | foo | val2 | 1 | # | bar | val1 | 3 | # | bar | val2 | 4 | # # Would require a query (simplified) # # SELECT sum(value) BY tag1 WHERE metric = foo ORDER BY sum(value) # # -> # # {tag1: val2, sum(value): 1} # # and then # # SELECT sum(value) BY metric, tag WHERE metric in [bar] and tag1 in [val2] # # to get the values for the other requested field(s). # # Since we do not have a requirement for ordered multi-field results (yet), # let's keep it simple and only allow a single field when `orderBy` is set. # raise InvalidParams("Cannot provide multiple 'field's when 'orderBy' is given") orderby = orderby[0] direction = Direction.ASC if orderby[0] == "-": orderby = orderby[1:] direction = Direction.DESC try: op, metric_name = self.fields[orderby] except KeyError: # orderBy one of the group by fields may be supported in the future raise InvalidParams("'orderBy' must be one of the provided 'fields'") if op in _OPERATIONS_PERCENTILES: # NOTE(jjbayer): This should work, will fix later raise InvalidParams("'orderBy' percentiles is not yet supported") return (op, metric_name), direction
def _parse_limit(self, query_params): limit = query_params.get("limit", None) if not self.orderby and limit: raise InvalidParams("'limit' is only supported in combination with 'orderBy'") if limit is not None: try: limit = int(limit) if limit < 1: raise ValueError except (ValueError, TypeError): raise InvalidParams("'limit' must be integer >= 1") return limit
def get_date_range(params: Mapping) -> Tuple[datetime, datetime, int]: """Get start, end, rollup for the given parameters. Apply a similar logic as `sessions_v2.get_constrained_date_range`, but with fewer constraints. More constraints may be added in the future. Note that this function returns a right-exclusive date range [start, end), contrary to the one used in sessions_v2. """ interval = parse_stats_period(params.get("interval", "1h")) interval = int(3600 if interval is None else interval.total_seconds()) # hard code min. allowed resolution to 10 seconds allowed_resolution = AllowedResolution.ten_seconds smallest_interval, interval_str = allowed_resolution.value if interval % smallest_interval != 0 or interval < smallest_interval: raise InvalidParams( f"The interval has to be a multiple of the minimum interval of {interval_str}." ) if ONE_DAY % interval != 0: raise InvalidParams( "The interval should divide one day without a remainder.") start, end = get_date_range_from_params(params) date_range = end - start date_range = timedelta( seconds=int(interval * math.ceil(date_range.total_seconds() / interval))) if date_range.total_seconds() / interval > MAX_POINTS: raise InvalidParams( "Your interval and date range would create too many results. " "Use a larger interval, or a smaller date range.") end_ts = int(interval * math.ceil(to_timestamp(end) / interval)) end = to_datetime(end_ts) start = end - date_range # NOTE: The sessions_v2 implementation cuts the `end` time to now + 1 minute # if `end` is in the future. This allows for better real time results when # caching is enabled on the snuba queries. Removed here for simplicity, # but we might want to reconsider once caching becomes an issue for metrics. return start, end, interval
def _get_metric(metric_name: str) -> dict: try: metric = _METRICS[metric_name] except KeyError: raise InvalidParams(f"Unknown metric '{metric_name}'") return metric
def _get_tag_values(cls, metric_name: str, tag_name: str) -> List[str]: metric = _get_metric(metric_name) try: tags = metric["tags"][tag_name] except KeyError: raise InvalidParams(f"Unknown tag '{tag_name}'") return tags
def _validate_series_limit(self, query_params): if self.limit: if (self.end - self.start).total_seconds() / self.rollup * self.limit > MAX_POINTS: raise InvalidParams( f"Requested interval of {query_params.get('interval', '1h')} with statsPeriod of " f"{query_params.get('statsPeriod')} is too granular for a per_page of " f"{self.limit} elements. Increase your interval, decrease your statsPeriod, " f"or decrease your per_page parameter." )
def _parse_orderby(self, query_params): orderby = query_params.getlist("orderBy", []) if not orderby: return None elif len(orderby) > 1: raise InvalidParams("Only one 'orderBy' is supported") orderby = orderby[0] direction = Direction.ASC if orderby[0] == "-": orderby = orderby[1:] direction = Direction.DESC try: op, metric_name = self.fields[orderby] except KeyError: # orderBy one of the group by fields may be supported in the future raise InvalidParams("'orderBy' must be one of the provided 'fields'") return (op, metric_name), direction
def _parse_offset(self, query_params, paginator_kwargs): if self.orderby: return paginator_kwargs.get("offset") else: cursor = query_params.get("cursor") if cursor is not None: # If order by is not None, it means we will have a `series` query which cannot be # paginated, and passing a `per_page` url param to paginate the results is not # possible raise InvalidParams("'cursor' is only supported in combination with 'orderBy'") return None
def parse_query(query_string: str) -> Sequence[Condition]: """Parse given filter query into a list of snuba conditions""" # HACK: Parse a sessions query, validate / transform afterwards. # We will want to write our own grammar + interpreter for this later. try: query_filter = QueryFilter( Dataset.Sessions, params={ "project_id": 0, }, ) where, _ = query_filter.resolve_conditions(query_string, use_aggregate_conditions=True) except InvalidSearchQuery as e: raise InvalidParams(f"Failed to parse query: {e}") return where
def parse_query(query_string: str) -> Sequence[Condition]: """Parse given filter query into a list of snuba conditions""" # HACK: Parse a sessions query, validate / transform afterwards. # We will want to write our own grammar + interpreter for this later. # Todo(ahmed): Check against `session.status` that was decided not to be supported try: query_builder = UnresolvedQuery( Dataset.Sessions, params={ "project_id": 0, }, ) where, _ = query_builder.resolve_conditions(query_string, use_aggregate_conditions=True) except InvalidSearchQuery as e: raise InvalidParams(f"Failed to parse query: {e}") return where
def get_single_metric( self, projects: Sequence[Project], metric_name: str ) -> MetricMetaWithTagKeys: """Get metadata for a single metric, without tag values""" try: metric = _METRICS[metric_name] except KeyError: raise InvalidParams() return dict( name=metric_name, **{ # Only return tag names key: (sorted(value.keys()) if key == "tags" else value) for key, value in metric.items() }, )
def get_single_metric_info(projects: Sequence[Project], metric_name: str) -> MetricMetaWithTagKeys: assert projects metric_id = indexer.resolve(metric_name) if metric_id is None: raise InvalidParams for metric_type in ("counter", "set", "distribution"): # TODO: What if metric_id exists for multiple types / units? entity_key = METRIC_TYPE_TO_ENTITY[metric_type] data = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column("tags.key")], where=[Condition(Column("metric_id"), Op.EQ, metric_id)], groupby=[Column("metric_id"), Column("tags.key")], referrer="snuba.metrics.meta.get_single_metric", projects=projects, org_id=projects[0].organization_id, ) if data: tag_ids = {tag_id for row in data for tag_id in row["tags.key"]} return { "name": metric_name, "type": metric_type, "operations": AVAILABLE_OPERATIONS[entity_key.value], "tags": sorted( ({ "key": reverse_resolve(tag_id) } for tag_id in tag_ids), key=itemgetter("key"), ), "unit": None, } raise InvalidParams(f"Raw metric {metric_name} does not exit")
def get_tag_values( projects: Sequence[Project], tag_name: str, metric_names: Optional[Sequence[str]] ) -> Sequence[TagValue]: """Get all known values for a specific tag""" assert projects tag_id = indexer.resolve(tag_name) if tag_id is None: raise InvalidParams(f"Tag {tag_name} is not available in the indexer") try: tags, _ = _fetch_tags_or_values_per_ids( projects=projects, column=f"tags[{tag_id}]", metric_names=metric_names, referrer="snuba.metrics.meta.get_tag_values", ) except InvalidParams: return [] return tags
def _generate_series(self, fields: dict, intervals: List[datetime]) -> dict: series = {} totals = {} for field, (operation, metric_name) in fields.items(): metric = _get_metric(metric_name) if operation not in metric["operations"]: raise InvalidParams(f"Invalid operation '{operation}' for metric '{metric_name}'") mu = 1000 * random.random() series[field] = [random.normalvariate(mu, 50) for _ in intervals] if operation == "count_unique": series[field] = list(map(int, series[field])) totals[field] = self._operations[operation](series[field]) return { "totals": totals, "series": series, }
def _get_entity_of_metric_name(projects: Sequence[Project], metric_name: str) -> EntityKey: assert projects metric_id = indexer.resolve(metric_name) if metric_id is None: raise InvalidParams for metric_type in ("counter", "set", "distribution"): entity_key = METRIC_TYPE_TO_ENTITY[metric_type] data = run_metrics_query( entity_key=entity_key, select=[Column("metric_id")], where=[Condition(Column("metric_id"), Op.EQ, metric_id)], groupby=[Column("metric_id")], referrer="snuba.metrics.meta.get_entity_of_metric", projects=projects, org_id=projects[0].organization_id, ) if data: return entity_key raise InvalidParams(f"Raw metric {metric_name} does not exit")
def get_series(projects: Sequence[Project], query: QueryDefinition) -> dict: """Get time series for the given query""" intervals = list(get_intervals(query)) results = {} if not query.groupby: # When there is no groupBy columns specified, we don't want to go through running an # initial query first to get the groups because there are no groups, and it becomes just # one group which is basically identical to eliminating the orderBy altogether query.orderby = None if query.orderby is not None: # ToDo(ahmed): Now that we have conditional aggregates as select statements, we might be # able to shave off a query here. we only need the other queries for fields spanning other # entities otherwise if all the fields belong to one entity then there is no need # There is a known limitation that since we make two queries, where we use the results of # the first query to filter down the results of the second query, so if the field used to # order by has no values for certain transactions for example in the case of the # performance table, we might end up showing less transactions than there actually are if # we choose to order by it. We are limited by the rows available for the field used in # the orderBy. # Multi-field select with order by functionality. Currently only supports the # performance table. original_query_fields = copy(query.fields) # The initial query has to contain only one field which is the same as the order by # field orderby_field = [key for key, value in query.fields.items() if value == query.orderby[0]][0] query.fields = {orderby_field: parse_field(orderby_field)} snuba_queries = SnubaQueryBuilder(projects, query).get_snuba_queries() if len(snuba_queries) > 1: # Currently accepting an order by field that spans multiple entities is not # supported, but it might change in the future. Even then, it might be better # handled on the snuba side of things raise InvalidParams( "Order by queries over multiple entities are not supported in " "multi-field select with order by clause queries" ) try: # This query contains an order by clause, and so we are only interested in the # "totals" query initial_snuba_query = next(iter(snuba_queries.values()))["totals"] initial_query_results = raw_snql_query( initial_snuba_query, use_cache=False, referrer="api.metrics.totals.initial_query" )["data"] except StopIteration: # This can occur when requesting a list of derived metrics that are not have no data # for the passed projects initial_query_results = [] # If we do not get any results from the first query, then there is no point in making # the second query if initial_query_results: # We no longer want the order by in the 2nd query because we already have the order of # the group by tags from the first query so we basically remove the order by columns, # and reset the query fields to the original fields because in the second query, # we want to query for all the metrics in the request api call query.orderby = None query.fields = original_query_fields snuba_queries = SnubaQueryBuilder(projects, query).get_snuba_queries() # Translate the groupby fields of the query into their tag keys because these fields # will be used to filter down and order the results of the 2nd query. # For example, (project_id, transaction) is translated to (project_id, tags[3]) groupby_tags = tuple( resolve_tag_key(field) if field not in ALLOWED_GROUPBY_COLUMNS else field for field in query.groupby ) # Dictionary that contains the conditions that are required to be added to the where # clause of the second query. In addition to filtering down on the tuple combination # of the fields in the group by columns, we need a separate condition for each of # the columns in the group by with their respective values so Clickhouse can # filter the results down before checking for the group by column combinations. ordered_tag_conditions = { col: list({data_elem[col] for data_elem in initial_query_results}) for col in groupby_tags } ordered_tag_conditions[groupby_tags] = [ tuple(data_elem[col] for col in groupby_tags) for data_elem in initial_query_results ] for entity, queries in snuba_queries.items(): results.setdefault(entity, {}) # This loop has constant time complexity as it will always have a maximum of # three queries corresponding to the three available entities # ["metrics_sets", "metrics_distributions", "metrics_counters"] for key, snuba_query in queries.items(): results[entity].setdefault(key, {"data": []}) # If query is grouped by project_id, then we should remove the original # condition project_id cause it might be more relaxed than the project_id # condition in the second query where = [] for condition in snuba_query.where: if not ( isinstance(condition.lhs, Column) and condition.lhs.name == "project_id" and "project_id" in groupby_tags ): where += [condition] # Adds the conditions obtained from the previous query for condition_key, condition_value in ordered_tag_conditions.items(): if not condition_key or not condition_value: # Safeguard to prevent adding empty conditions to the where clause continue lhs_condition = ( Function("tuple", [Column(col) for col in condition_key]) if isinstance(condition_key, tuple) else Column(condition_key) ) where += [ Condition(lhs_condition, Op.IN, Function("tuple", condition_value)) ] snuba_query = snuba_query.set_where(where) # Set the limit of the second query to be the provided limits multiplied by # the number of the metrics requested in the query in this specific entity snuba_query = snuba_query.set_limit( snuba_query.limit.limit * len(snuba_query.select) ) snuba_query = snuba_query.set_offset(0) snuba_query_res = raw_snql_query( snuba_query, use_cache=False, referrer=f"api.metrics.{key}.second_query" ) # Create a dictionary that has keys representing the ordered by tuples from the # initial query, so that we are able to order it easily in the next code block # If for example, we are grouping by (project_id, transaction) -> then this # logic will output a dictionary that looks something like, where `tags[1]` # represents transaction # { # (3, 2): [{"metric_id": 4, "project_id": 3, "tags[1]": 2, "p50": [11.0]}], # (3, 3): [{"metric_id": 4, "project_id": 3, "tags[1]": 3, "p50": [5.0]}], # } snuba_query_data_dict = {} for data_elem in snuba_query_res["data"]: snuba_query_data_dict.setdefault( tuple(data_elem[col] for col in groupby_tags), [] ).append(data_elem) # Order the results according to the results of the initial query, so that when # the results dict is passed on to `SnubaResultsConverter`, it comes out ordered # Ordered conditions might for example look something like this # {..., ('project_id', 'tags[1]'): [(3, 3), (3, 2)]}, then we end up with # { # "totals": { # "data": [ # { # "metric_id": 5, "project_id": 3, "tags[1]": 3, "count_unique": 5 # }, # { # "metric_id": 5, "project_id": 3, "tags[1]": 2, "count_unique": 1 # }, # ] # } # } for group_tuple in ordered_tag_conditions[groupby_tags]: results[entity][key]["data"] += snuba_query_data_dict.get(group_tuple, []) else: snuba_queries = SnubaQueryBuilder(projects, query).get_snuba_queries() for entity, queries in snuba_queries.items(): results.setdefault(entity, {}) for key, snuba_query in queries.items(): if snuba_query is None: continue results[entity][key] = raw_snql_query( snuba_query, use_cache=False, referrer=f"api.metrics.{key}" ) assert projects converter = SnubaResultConverter(projects[0].organization_id, query, intervals, results) return { "start": query.start, "end": query.end, "query": query.query, "intervals": intervals, "groups": converter.translate_results(), }
def _validate_metric_names(cls, metric_names): unknown_metric_names = set(metric_names) - _METRICS.keys() if unknown_metric_names: raise InvalidParams(f"Unknown metrics '{', '.join(unknown_metric_names)}'") return metric_names
def _fetch_tags_or_values_per_ids( projects: Sequence[Project], metric_names: Optional[Sequence[str]], referrer: str, column: str, ) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]: """ Function that takes as input projects, metric_names, and a column, and based on the column selection, either returns tags or tag values for the combination of projects and metric_names selected or in the case of no metric_names passed, returns basically all the tags or the tag values available for those projects. In addition, when exactly one metric name is passed in metric_names, then the type (i.e. mapping to the entity) is also returned """ try: metric_ids = _get_metrics_filter_ids(metric_names) except MetricDoesNotExistInIndexer: raise InvalidParams( f"Some or all of the metric names in {metric_names} do not exist in the indexer" ) else: where = [Condition(Column("metric_id"), Op.IN, list(metric_ids))] if metric_ids else [] tag_or_value_ids_per_metric_id = defaultdict(list) # This dictionary is required as a mapping from an entity to the ids available in it to # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single # entity by validating that the ids of the constituent metrics all lie in the same entity supported_metric_ids_in_entities = {} for metric_type in ("counter", "set", "distribution"): entity_key = METRIC_TYPE_TO_ENTITY[metric_type] rows = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column(column)], where=where, groupby=[Column("metric_id"), Column(column)], referrer=referrer, projects=projects, org_id=projects[0].organization_id, ) for row in rows: metric_id = row["metric_id"] if column.startswith("tags["): value_id = row[column] if value_id > 0: tag_or_value_ids_per_metric_id[metric_id].append(value_id) else: tag_or_value_ids_per_metric_id[metric_id].extend(row[column]) supported_metric_ids_in_entities.setdefault(metric_type, []).append(row["metric_id"]) # If we get not results back from snuba, then raise an InvalidParams with an appropriate # error message if not tag_or_value_ids_per_metric_id: if metric_names: error_str = f"The following metrics {metric_names} do not exist in the dataset" else: error_str = "Dataset contains no metric data for your project selection" raise InvalidParams(error_str) tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values() if metric_names: # If there are metric_ids that map to the metric_names provided as an arg that were not # found in the dataset, then we raise an instance of InvalidParams exception if metric_ids != set(tag_or_value_ids_per_metric_id.keys()): # This can occur for metric names that don't have an equivalent in the dataset. raise InvalidParams( f"Not all the requested metrics or the constituent metrics in {metric_names} have " f"data in the dataset" ) # At this point, we are sure that every metric_name/metric_id that was requested is # present in the dataset, and now we need to check that for all derived metrics requested # (if any are requested) are setup correctly i.e. constituent of # SingularEntityDerivedMetric actually span a single entity _validate_requested_derived_metrics_in_input_metrics( metric_names=metric_names, supported_metric_ids_in_entities=supported_metric_ids_in_entities, ) # Only return tags/tag values that occur in all metrics tag_or_value_ids = set.intersection(*map(set, tag_or_value_id_lists)) else: tag_or_value_ids = {tag_id for ids in tag_or_value_id_lists for tag_id in ids} if column.startswith("tags["): tag_id = column.split("tags[")[1].split("]")[0] tags_or_values = [ {"key": reverse_resolve(int(tag_id)), "value": reverse_resolve(value_id)} for value_id in tag_or_value_ids ] tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"])) else: tags_or_values = [{"key": reverse_resolve(tag_id)} for tag_id in tag_or_value_ids] tags_or_values.sort(key=itemgetter("key")) if metric_names and len(metric_names) == 1: metric_type = list(supported_metric_ids_in_entities.keys())[0] return tags_or_values, metric_type return tags_or_values, None
def resolve_tag_key(string: str) -> str: resolved = indexer.resolve(string) if resolved is None: raise InvalidParams(f"Unknown tag key: '{string}'") return f"tags[{resolved}]"