Example #1
0
def get_teams(request, organization, teams=None):
    # do normal teams lookup based on request params
    requested_teams = set(request.GET.getlist("team",
                                              [])) if teams is None else teams

    verified_ids = set()

    if "myteams" in requested_teams:
        requested_teams.remove("myteams")
        if is_active_superuser(request):
            # retrieve all teams within the organization
            myteams = Team.objects.filter(
                organization=organization,
                status=TeamStatus.VISIBLE).values_list("id", flat=True)
            verified_ids.update(myteams)
        else:
            myteams = [t.id for t in request.access.teams]
            verified_ids.update(myteams)

    for team_id in requested_teams:  # Verify each passed Team id is numeric
        if type(team_id) is not int and not team_id.isdigit():
            raise InvalidParams(f"Invalid Team ID: {team_id}")
    requested_teams.update(verified_ids)

    teams_query = Team.objects.filter(id__in=requested_teams)
    for team in teams_query:
        if team.id in verified_ids:
            continue

        if not request.access.has_team_access(team):
            raise InvalidParams(
                f"Error: You do not have permission to access {team.name}", )

    return teams_query
Example #2
0
    def _parse_orderby(self, query_params):
        orderby = query_params.getlist("orderBy", [])
        if not orderby:
            return None
        elif len(orderby) > 1:
            raise InvalidParams("Only one 'orderBy' is supported")

        if len(self.fields) != 1:
            # If we were to allow multiple fields when `orderBy` is set,
            # we would require two snuba queries: one to get the sorted metric,
            # And one to get the fields that we are not currently sorting by.
            #
            # For example, the query
            #
            #   ?field=sum(foo)&field=sum(bar)&groupBy=tag1&orderBy=sum(foo)&limit=1
            #
            # with snuba entries (simplified)
            #
            #   | metric | tag1 | sum(value) |
            #   |----------------------------|
            #   | foo    | val1 |          2 |
            #   | foo    | val2 |          1 |
            #   | bar    | val1 |          3 |
            #   | bar    | val2 |          4 |
            #
            # Would require a query (simplified)
            #
            #   SELECT sum(value) BY tag1 WHERE metric = foo ORDER BY sum(value)
            #
            # ->
            #
            #   {tag1: val2, sum(value): 1}
            #
            # and then
            #
            #   SELECT sum(value) BY metric, tag WHERE metric in [bar] and tag1 in [val2]
            #
            # to get the values for the other requested field(s).
            #
            # Since we do not have a requirement for ordered multi-field results (yet),
            # let's keep it simple and only allow a single field when `orderBy` is set.
            #
            raise InvalidParams("Cannot provide multiple 'field's when 'orderBy' is given")
        orderby = orderby[0]
        direction = Direction.ASC
        if orderby[0] == "-":
            orderby = orderby[1:]
            direction = Direction.DESC
        try:
            op, metric_name = self.fields[orderby]
        except KeyError:
            # orderBy one of the group by fields may be supported in the future
            raise InvalidParams("'orderBy' must be one of the provided 'fields'")

        if op in _OPERATIONS_PERCENTILES:
            # NOTE(jjbayer): This should work, will fix later
            raise InvalidParams("'orderBy' percentiles is not yet supported")

        return (op, metric_name), direction
Example #3
0
    def _parse_limit(self, query_params):
        limit = query_params.get("limit", None)
        if not self.orderby and limit:
            raise InvalidParams("'limit' is only supported in combination with 'orderBy'")

        if limit is not None:
            try:
                limit = int(limit)
                if limit < 1:
                    raise ValueError
            except (ValueError, TypeError):
                raise InvalidParams("'limit' must be integer >= 1")

        return limit
Example #4
0
def get_date_range(params: Mapping) -> Tuple[datetime, datetime, int]:
    """Get start, end, rollup for the given parameters.

    Apply a similar logic as `sessions_v2.get_constrained_date_range`,
    but with fewer constraints. More constraints may be added in the future.

    Note that this function returns a right-exclusive date range [start, end),
    contrary to the one used in sessions_v2.

    """
    interval = parse_stats_period(params.get("interval", "1h"))
    interval = int(3600 if interval is None else interval.total_seconds())

    # hard code min. allowed resolution to 10 seconds
    allowed_resolution = AllowedResolution.ten_seconds

    smallest_interval, interval_str = allowed_resolution.value
    if interval % smallest_interval != 0 or interval < smallest_interval:
        raise InvalidParams(
            f"The interval has to be a multiple of the minimum interval of {interval_str}."
        )

    if ONE_DAY % interval != 0:
        raise InvalidParams(
            "The interval should divide one day without a remainder.")

    start, end = get_date_range_from_params(params)

    date_range = end - start

    date_range = timedelta(
        seconds=int(interval *
                    math.ceil(date_range.total_seconds() / interval)))

    if date_range.total_seconds() / interval > MAX_POINTS:
        raise InvalidParams(
            "Your interval and date range would create too many results. "
            "Use a larger interval, or a smaller date range.")

    end_ts = int(interval * math.ceil(to_timestamp(end) / interval))
    end = to_datetime(end_ts)
    start = end - date_range

    # NOTE: The sessions_v2 implementation cuts the `end` time to now + 1 minute
    # if `end` is in the future. This allows for better real time results when
    # caching is enabled on the snuba queries. Removed here for simplicity,
    # but we might want to reconsider once caching becomes an issue for metrics.

    return start, end, interval
Example #5
0
def _get_metric(metric_name: str) -> dict:
    try:
        metric = _METRICS[metric_name]
    except KeyError:
        raise InvalidParams(f"Unknown metric '{metric_name}'")

    return metric
Example #6
0
    def _get_tag_values(cls, metric_name: str, tag_name: str) -> List[str]:
        metric = _get_metric(metric_name)
        try:
            tags = metric["tags"][tag_name]
        except KeyError:
            raise InvalidParams(f"Unknown tag '{tag_name}'")

        return tags
Example #7
0
 def _validate_series_limit(self, query_params):
     if self.limit:
         if (self.end - self.start).total_seconds() / self.rollup * self.limit > MAX_POINTS:
             raise InvalidParams(
                 f"Requested interval of {query_params.get('interval', '1h')} with statsPeriod of "
                 f"{query_params.get('statsPeriod')} is too granular for a per_page of "
                 f"{self.limit} elements. Increase your interval, decrease your statsPeriod, "
                 f"or decrease your per_page parameter."
             )
Example #8
0
    def _parse_orderby(self, query_params):
        orderby = query_params.getlist("orderBy", [])
        if not orderby:
            return None
        elif len(orderby) > 1:
            raise InvalidParams("Only one 'orderBy' is supported")

        orderby = orderby[0]
        direction = Direction.ASC
        if orderby[0] == "-":
            orderby = orderby[1:]
            direction = Direction.DESC
        try:
            op, metric_name = self.fields[orderby]
        except KeyError:
            # orderBy one of the group by fields may be supported in the future
            raise InvalidParams("'orderBy' must be one of the provided 'fields'")

        return (op, metric_name), direction
Example #9
0
 def _parse_offset(self, query_params, paginator_kwargs):
     if self.orderby:
         return paginator_kwargs.get("offset")
     else:
         cursor = query_params.get("cursor")
         if cursor is not None:
             # If order by is not None, it means we will have a `series` query which cannot be
             # paginated, and passing a `per_page` url param to paginate the results is not
             # possible
             raise InvalidParams("'cursor' is only supported in combination with 'orderBy'")
         return None
Example #10
0
def parse_query(query_string: str) -> Sequence[Condition]:
    """Parse given filter query into a list of snuba conditions"""
    # HACK: Parse a sessions query, validate / transform afterwards.
    # We will want to write our own grammar + interpreter for this later.
    try:
        query_filter = QueryFilter(
            Dataset.Sessions,
            params={
                "project_id": 0,
            },
        )
        where, _ = query_filter.resolve_conditions(query_string, use_aggregate_conditions=True)
    except InvalidSearchQuery as e:
        raise InvalidParams(f"Failed to parse query: {e}")

    return where
Example #11
0
def parse_query(query_string: str) -> Sequence[Condition]:
    """Parse given filter query into a list of snuba conditions"""
    # HACK: Parse a sessions query, validate / transform afterwards.
    # We will want to write our own grammar + interpreter for this later.
    # Todo(ahmed): Check against `session.status` that was decided not to be supported
    try:
        query_builder = UnresolvedQuery(
            Dataset.Sessions,
            params={
                "project_id": 0,
            },
        )
        where, _ = query_builder.resolve_conditions(query_string, use_aggregate_conditions=True)
    except InvalidSearchQuery as e:
        raise InvalidParams(f"Failed to parse query: {e}")

    return where
Example #12
0
    def get_single_metric(
        self, projects: Sequence[Project], metric_name: str
    ) -> MetricMetaWithTagKeys:
        """Get metadata for a single metric, without tag values"""
        try:
            metric = _METRICS[metric_name]
        except KeyError:
            raise InvalidParams()

        return dict(
            name=metric_name,
            **{
                # Only return tag names
                key: (sorted(value.keys()) if key == "tags" else value)
                for key, value in metric.items()
            },
        )
Example #13
0
def get_single_metric_info(projects: Sequence[Project],
                           metric_name: str) -> MetricMetaWithTagKeys:
    assert projects

    metric_id = indexer.resolve(metric_name)

    if metric_id is None:
        raise InvalidParams

    for metric_type in ("counter", "set", "distribution"):
        # TODO: What if metric_id exists for multiple types / units?
        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        data = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id"),
                    Column("tags.key")],
            where=[Condition(Column("metric_id"), Op.EQ, metric_id)],
            groupby=[Column("metric_id"),
                     Column("tags.key")],
            referrer="snuba.metrics.meta.get_single_metric",
            projects=projects,
            org_id=projects[0].organization_id,
        )
        if data:
            tag_ids = {tag_id for row in data for tag_id in row["tags.key"]}
            return {
                "name":
                metric_name,
                "type":
                metric_type,
                "operations":
                AVAILABLE_OPERATIONS[entity_key.value],
                "tags":
                sorted(
                    ({
                        "key": reverse_resolve(tag_id)
                    } for tag_id in tag_ids),
                    key=itemgetter("key"),
                ),
                "unit":
                None,
            }

    raise InvalidParams(f"Raw metric {metric_name} does not exit")
Example #14
0
def get_tag_values(
    projects: Sequence[Project], tag_name: str, metric_names: Optional[Sequence[str]]
) -> Sequence[TagValue]:
    """Get all known values for a specific tag"""
    assert projects

    tag_id = indexer.resolve(tag_name)
    if tag_id is None:
        raise InvalidParams(f"Tag {tag_name} is not available in the indexer")

    try:
        tags, _ = _fetch_tags_or_values_per_ids(
            projects=projects,
            column=f"tags[{tag_id}]",
            metric_names=metric_names,
            referrer="snuba.metrics.meta.get_tag_values",
        )
    except InvalidParams:
        return []
    return tags
Example #15
0
    def _generate_series(self, fields: dict, intervals: List[datetime]) -> dict:
        series = {}
        totals = {}
        for field, (operation, metric_name) in fields.items():

            metric = _get_metric(metric_name)

            if operation not in metric["operations"]:
                raise InvalidParams(f"Invalid operation '{operation}' for metric '{metric_name}'")

            mu = 1000 * random.random()
            series[field] = [random.normalvariate(mu, 50) for _ in intervals]

            if operation == "count_unique":
                series[field] = list(map(int, series[field]))

            totals[field] = self._operations[operation](series[field])

        return {
            "totals": totals,
            "series": series,
        }
Example #16
0
def _get_entity_of_metric_name(projects: Sequence[Project],
                               metric_name: str) -> EntityKey:
    assert projects

    metric_id = indexer.resolve(metric_name)

    if metric_id is None:
        raise InvalidParams

    for metric_type in ("counter", "set", "distribution"):
        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        data = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id")],
            where=[Condition(Column("metric_id"), Op.EQ, metric_id)],
            groupby=[Column("metric_id")],
            referrer="snuba.metrics.meta.get_entity_of_metric",
            projects=projects,
            org_id=projects[0].organization_id,
        )
        if data:
            return entity_key

    raise InvalidParams(f"Raw metric {metric_name} does not exit")
Example #17
0
def get_series(projects: Sequence[Project], query: QueryDefinition) -> dict:
    """Get time series for the given query"""
    intervals = list(get_intervals(query))
    results = {}

    if not query.groupby:
        # When there is no groupBy columns specified, we don't want to go through running an
        # initial query first to get the groups because there are no groups, and it becomes just
        # one group which is basically identical to eliminating the orderBy altogether
        query.orderby = None

    if query.orderby is not None:
        # ToDo(ahmed): Now that we have conditional aggregates as select statements, we might be
        #  able to shave off a query here. we only need the other queries for fields spanning other
        #  entities otherwise if all the fields belong to one entity then there is no need
        # There is a known limitation that since we make two queries, where we use the results of
        # the first query to filter down the results of the second query, so if the field used to
        # order by has no values for certain transactions for example in the case of the
        # performance table, we might end up showing less transactions than there actually are if
        # we choose to order by it. We are limited by the rows available for the field used in
        # the orderBy.

        # Multi-field select with order by functionality. Currently only supports the
        # performance table.
        original_query_fields = copy(query.fields)

        # The initial query has to contain only one field which is the same as the order by
        # field
        orderby_field = [key for key, value in query.fields.items() if value == query.orderby[0]][0]
        query.fields = {orderby_field: parse_field(orderby_field)}

        snuba_queries = SnubaQueryBuilder(projects, query).get_snuba_queries()
        if len(snuba_queries) > 1:
            # Currently accepting an order by field that spans multiple entities is not
            # supported, but it might change in the future. Even then, it might be better
            # handled on the snuba side of things
            raise InvalidParams(
                "Order by queries over multiple entities are not supported in "
                "multi-field select with order by clause queries"
            )

        try:
            # This query contains an order by clause, and so we are only interested in the
            # "totals" query
            initial_snuba_query = next(iter(snuba_queries.values()))["totals"]

            initial_query_results = raw_snql_query(
                initial_snuba_query, use_cache=False, referrer="api.metrics.totals.initial_query"
            )["data"]

        except StopIteration:
            # This can occur when requesting a list of derived metrics that are not have no data
            # for the passed projects
            initial_query_results = []

        # If we do not get any results from the first query, then there is no point in making
        # the second query
        if initial_query_results:
            # We no longer want the order by in the 2nd query because we already have the order of
            # the group by tags from the first query so we basically remove the order by columns,
            # and reset the query fields to the original fields because in the second query,
            # we want to query for all the metrics in the request api call
            query.orderby = None
            query.fields = original_query_fields

            snuba_queries = SnubaQueryBuilder(projects, query).get_snuba_queries()

            # Translate the groupby fields of the query into their tag keys because these fields
            # will be used to filter down and order the results of the 2nd query.
            # For example, (project_id, transaction) is translated to (project_id, tags[3])
            groupby_tags = tuple(
                resolve_tag_key(field) if field not in ALLOWED_GROUPBY_COLUMNS else field
                for field in query.groupby
            )

            # Dictionary that contains the conditions that are required to be added to the where
            # clause of the second query. In addition to filtering down on the tuple combination
            # of the fields in the group by columns, we need a separate condition for each of
            # the columns in the group by with their respective values so Clickhouse can
            # filter the results down before checking for the group by column combinations.
            ordered_tag_conditions = {
                col: list({data_elem[col] for data_elem in initial_query_results})
                for col in groupby_tags
            }
            ordered_tag_conditions[groupby_tags] = [
                tuple(data_elem[col] for col in groupby_tags) for data_elem in initial_query_results
            ]

            for entity, queries in snuba_queries.items():
                results.setdefault(entity, {})
                # This loop has constant time complexity as it will always have a maximum of
                # three queries corresponding to the three available entities
                # ["metrics_sets", "metrics_distributions", "metrics_counters"]
                for key, snuba_query in queries.items():
                    results[entity].setdefault(key, {"data": []})
                    # If query is grouped by project_id, then we should remove the original
                    # condition project_id cause it might be more relaxed than the project_id
                    # condition in the second query
                    where = []
                    for condition in snuba_query.where:
                        if not (
                            isinstance(condition.lhs, Column)
                            and condition.lhs.name == "project_id"
                            and "project_id" in groupby_tags
                        ):
                            where += [condition]

                    # Adds the conditions obtained from the previous query
                    for condition_key, condition_value in ordered_tag_conditions.items():
                        if not condition_key or not condition_value:
                            # Safeguard to prevent adding empty conditions to the where clause
                            continue

                        lhs_condition = (
                            Function("tuple", [Column(col) for col in condition_key])
                            if isinstance(condition_key, tuple)
                            else Column(condition_key)
                        )
                        where += [
                            Condition(lhs_condition, Op.IN, Function("tuple", condition_value))
                        ]
                    snuba_query = snuba_query.set_where(where)

                    # Set the limit of the second query to be the provided limits multiplied by
                    # the number of the metrics requested in the query in this specific entity
                    snuba_query = snuba_query.set_limit(
                        snuba_query.limit.limit * len(snuba_query.select)
                    )
                    snuba_query = snuba_query.set_offset(0)

                    snuba_query_res = raw_snql_query(
                        snuba_query, use_cache=False, referrer=f"api.metrics.{key}.second_query"
                    )
                    # Create a dictionary that has keys representing the ordered by tuples from the
                    # initial query, so that we are able to order it easily in the next code block
                    # If for example, we are grouping by (project_id, transaction) -> then this
                    # logic will output a dictionary that looks something like, where `tags[1]`
                    # represents transaction
                    # {
                    #     (3, 2): [{"metric_id": 4, "project_id": 3, "tags[1]": 2, "p50": [11.0]}],
                    #     (3, 3): [{"metric_id": 4, "project_id": 3, "tags[1]": 3, "p50": [5.0]}],
                    # }
                    snuba_query_data_dict = {}
                    for data_elem in snuba_query_res["data"]:
                        snuba_query_data_dict.setdefault(
                            tuple(data_elem[col] for col in groupby_tags), []
                        ).append(data_elem)

                    # Order the results according to the results of the initial query, so that when
                    # the results dict is passed on to `SnubaResultsConverter`, it comes out ordered
                    # Ordered conditions might for example look something like this
                    # {..., ('project_id', 'tags[1]'): [(3, 3), (3, 2)]}, then we end up with
                    # {
                    #     "totals": {
                    #         "data": [
                    #             {
                    #               "metric_id": 5, "project_id": 3, "tags[1]": 3, "count_unique": 5
                    #             },
                    #             {
                    #               "metric_id": 5, "project_id": 3, "tags[1]": 2, "count_unique": 1
                    #             },
                    #         ]
                    #     }
                    # }
                    for group_tuple in ordered_tag_conditions[groupby_tags]:
                        results[entity][key]["data"] += snuba_query_data_dict.get(group_tuple, [])
    else:
        snuba_queries = SnubaQueryBuilder(projects, query).get_snuba_queries()
        for entity, queries in snuba_queries.items():
            results.setdefault(entity, {})
            for key, snuba_query in queries.items():
                if snuba_query is None:
                    continue
                results[entity][key] = raw_snql_query(
                    snuba_query, use_cache=False, referrer=f"api.metrics.{key}"
                )

    assert projects
    converter = SnubaResultConverter(projects[0].organization_id, query, intervals, results)

    return {
        "start": query.start,
        "end": query.end,
        "query": query.query,
        "intervals": intervals,
        "groups": converter.translate_results(),
    }
Example #18
0
    def _validate_metric_names(cls, metric_names):
        unknown_metric_names = set(metric_names) - _METRICS.keys()
        if unknown_metric_names:
            raise InvalidParams(f"Unknown metrics '{', '.join(unknown_metric_names)}'")

        return metric_names
Example #19
0
def _fetch_tags_or_values_per_ids(
    projects: Sequence[Project],
    metric_names: Optional[Sequence[str]],
    referrer: str,
    column: str,
) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]:
    """
    Function that takes as input projects, metric_names, and a column, and based on the column
    selection, either returns tags or tag values for the combination of projects and metric_names
    selected or in the case of no metric_names passed, returns basically all the tags or the tag
    values available for those projects. In addition, when exactly one metric name is passed in
    metric_names, then the type (i.e. mapping to the entity) is also returned
    """
    try:
        metric_ids = _get_metrics_filter_ids(metric_names)
    except MetricDoesNotExistInIndexer:
        raise InvalidParams(
            f"Some or all of the metric names in {metric_names} do not exist in the indexer"
        )
    else:
        where = [Condition(Column("metric_id"), Op.IN, list(metric_ids))] if metric_ids else []

    tag_or_value_ids_per_metric_id = defaultdict(list)
    # This dictionary is required as a mapping from an entity to the ids available in it to
    # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single
    # entity by validating that the ids of the constituent metrics all lie in the same entity
    supported_metric_ids_in_entities = {}

    for metric_type in ("counter", "set", "distribution"):

        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        rows = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id"), Column(column)],
            where=where,
            groupby=[Column("metric_id"), Column(column)],
            referrer=referrer,
            projects=projects,
            org_id=projects[0].organization_id,
        )

        for row in rows:
            metric_id = row["metric_id"]
            if column.startswith("tags["):
                value_id = row[column]
                if value_id > 0:
                    tag_or_value_ids_per_metric_id[metric_id].append(value_id)
            else:
                tag_or_value_ids_per_metric_id[metric_id].extend(row[column])
            supported_metric_ids_in_entities.setdefault(metric_type, []).append(row["metric_id"])

    # If we get not results back from snuba, then raise an InvalidParams with an appropriate
    # error message
    if not tag_or_value_ids_per_metric_id:
        if metric_names:
            error_str = f"The following metrics {metric_names} do not exist in the dataset"
        else:
            error_str = "Dataset contains no metric data for your project selection"
        raise InvalidParams(error_str)

    tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values()
    if metric_names:
        # If there are metric_ids that map to the metric_names provided as an arg that were not
        # found in the dataset, then we raise an instance of InvalidParams exception
        if metric_ids != set(tag_or_value_ids_per_metric_id.keys()):
            # This can occur for metric names that don't have an equivalent in the dataset.
            raise InvalidParams(
                f"Not all the requested metrics or the constituent metrics in {metric_names} have "
                f"data in the dataset"
            )

        # At this point, we are sure that every metric_name/metric_id that was requested is
        # present in the dataset, and now we need to check that for all derived metrics requested
        # (if any are requested) are setup correctly i.e. constituent of
        # SingularEntityDerivedMetric actually span a single entity
        _validate_requested_derived_metrics_in_input_metrics(
            metric_names=metric_names,
            supported_metric_ids_in_entities=supported_metric_ids_in_entities,
        )

        # Only return tags/tag values that occur in all metrics
        tag_or_value_ids = set.intersection(*map(set, tag_or_value_id_lists))
    else:
        tag_or_value_ids = {tag_id for ids in tag_or_value_id_lists for tag_id in ids}

    if column.startswith("tags["):
        tag_id = column.split("tags[")[1].split("]")[0]
        tags_or_values = [
            {"key": reverse_resolve(int(tag_id)), "value": reverse_resolve(value_id)}
            for value_id in tag_or_value_ids
        ]
        tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"]))
    else:
        tags_or_values = [{"key": reverse_resolve(tag_id)} for tag_id in tag_or_value_ids]
        tags_or_values.sort(key=itemgetter("key"))

    if metric_names and len(metric_names) == 1:
        metric_type = list(supported_metric_ids_in_entities.keys())[0]
        return tags_or_values, metric_type
    return tags_or_values, None
Example #20
0
def resolve_tag_key(string: str) -> str:
    resolved = indexer.resolve(string)
    if resolved is None:
        raise InvalidParams(f"Unknown tag key: '{string}'")

    return f"tags[{resolved}]"