Esempio n. 1
0
def get_single_metric_info(projects: Sequence[Project],
                           metric_name: str) -> MetricMetaWithTagKeys:
    assert projects

    metric_id = indexer.resolve(metric_name)

    if metric_id is None:
        raise InvalidParams

    for metric_type in ("counter", "set", "distribution"):
        # TODO: What if metric_id exists for multiple types / units?
        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        data = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id"),
                    Column("tags.key")],
            where=[Condition(Column("metric_id"), Op.EQ, metric_id)],
            groupby=[Column("metric_id"),
                     Column("tags.key")],
            referrer="snuba.metrics.meta.get_single_metric",
            projects=projects,
            org_id=projects[0].organization_id,
        )
        if data:
            tag_ids = {tag_id for row in data for tag_id in row["tags.key"]}
            return {
                "name":
                metric_name,
                "type":
                metric_type,
                "operations":
                AVAILABLE_OPERATIONS[entity_key.value],
                "tags":
                sorted(
                    ({
                        "key": reverse_resolve(tag_id)
                    } for tag_id in tag_ids),
                    key=itemgetter("key"),
                ),
                "unit":
                None,
            }

    raise InvalidParams(f"Raw metric {metric_name} does not exit")
Esempio n. 2
0
def get_metrics(projects: Sequence[Project]) -> Sequence[MetricMeta]:
    assert projects

    metrics_meta = []
    metric_ids_in_entities = {}

    for metric_type in ("counter", "set", "distribution"):
        metric_ids_in_entities.setdefault(metric_type, set())
        for row in _get_metrics_for_entity(
            entity_key=METRIC_TYPE_TO_ENTITY[metric_type],
            projects=projects,
            org_id=projects[0].organization_id,
        ):
            metrics_meta.append(
                MetricMeta(
                    name=reverse_resolve(row["metric_id"]),
                    type=metric_type,
                    operations=AVAILABLE_OPERATIONS[METRIC_TYPE_TO_ENTITY[metric_type].value],
                    unit=None,  # snuba does not know the unit
                )
            )
            metric_ids_in_entities[metric_type].add(row["metric_id"])

    # In the previous loop, we find all available metric ids per entity with respect to the
    # projects filter, and so to figure out which derived metrics are supported for these
    # projects, we need to iterate over the list of derived metrics and generate the ids of
    # their constituent metrics. A derived metric should be added to the response list if its
    # metric ids are a subset of the metric ids in one of the entities i.e. Its an instance of
    # SingularEntityDerivedMetric.
    # ToDo(ahmed): When CompositeEntityDerivedMetrics are introduced we need to do these checks
    #  not on the instance of the CompositeEntityDerivedMetric but rather on its
    #  SingularEntityDerivedMetric constituents
    found_derived_metrics = get_available_derived_metrics(metric_ids_in_entities)
    for derived_metric_name in found_derived_metrics:
        derived_metric_obj = DERIVED_METRICS[derived_metric_name]
        metrics_meta.append(
            MetricMeta(
                name=derived_metric_obj.metric_name,
                type=derived_metric_obj.result_type,
                operations=derived_metric_obj.generate_available_operations(),
                unit=derived_metric_obj.unit,
            )
        )
    return sorted(metrics_meta, key=itemgetter("name"))
Esempio n. 3
0
    def translate_sessions_tag_keys_and_values(
            data: List[Dict[str, Any]],
            org_id: int,
            alias: Optional[str] = None) -> Tuple[int, int]:
        value_col_name = alias if alias else "value"
        try:
            translated_data: Dict[str, Any] = {}
            session_status = resolve_tag_key("session.status")
            for row in data:
                tag_value = reverse_resolve(row[session_status])
                translated_data[tag_value] = row[value_col_name]

            total_session_count = translated_data.get("init", 0)
            crash_count = translated_data.get("crashed", 0)
        except MetricIndexNotFound:
            metrics.incr(
                "incidents.entity_subscription.metric_index_not_found")
            total_session_count = crash_count = 0
        return total_session_count, crash_count
Esempio n. 4
0
def run_sessions_query(
    org_id: int,
    query: QueryDefinition,
    span_op: str,
) -> SessionsQueryResult:
    """Convert a QueryDefinition to multiple snuba queries and reformat the results"""
    # This is necessary so that we do not mutate the query object shared between different
    # backend runs
    query_clone = deepcopy(query)

    data, metric_to_output_field = _fetch_data(org_id, query_clone)

    data_points = _flatten_data(org_id, data)

    intervals = list(get_intervals(query_clone))
    timestamp_index = {
        timestamp.isoformat(): index
        for index, timestamp in enumerate(intervals)
    }

    def default_for(field: SessionsQueryFunction) -> SessionsQueryValue:
        return 0 if field in ("sum(session)", "count_unique(user)") else None

    GroupKey = Tuple[Tuple[GroupByFieldName, Union[str, int]], ...]

    class Group(TypedDict):
        series: MutableMapping[SessionsQueryFunction, List[SessionsQueryValue]]
        totals: MutableMapping[SessionsQueryFunction, SessionsQueryValue]

    groups: MutableMapping[GroupKey, Group] = defaultdict(
        lambda: {
            "totals":
            {field: default_for(field)
             for field in query_clone.raw_fields},
            "series": {
                field: len(intervals) * [default_for(field)]
                for field in query_clone.raw_fields
            },
        })

    if len(data_points) == 0:
        # We're only interested in `session.status` group-byes. The rest of the
        # conditions require work (e.g. getting all environments) that we can't
        # get without querying the DB, including group-byes consisting of
        # multiple parameters (even if `session.status` is one of them).
        if query_clone.raw_groupby == ["session.status"]:
            for status in get_args(_SessionStatus):
                gkey: GroupKey = (("session.status", status), )
                groups[gkey]
    else:
        for key in data_points.keys():
            try:
                output_field = metric_to_output_field[key.metric_key,
                                                      key.column]
            except KeyError:
                continue  # secondary metric, like session.error

            by: MutableMapping[GroupByFieldName, Union[str, int]] = {}
            if key.release is not None:
                # Every session has a release, so this should not throw
                by["release"] = reverse_resolve(key.release)
            if key.environment is not None:
                # To match behavior of the old sessions backend, session data
                # without environment is grouped under the empty string.
                by["environment"] = reverse_resolve_weak(key.environment) or ""
            if key.project_id is not None:
                by["project"] = key.project_id

            for status_value in output_field.get_values(data_points, key):
                if status_value.session_status is not None:
                    by["session.status"] = status_value.session_status  # !

                group_key: GroupKey = tuple(sorted(by.items()))
                group: Group = groups[group_key]

                value = status_value.value
                if value is not None:
                    value = finite_or_none(value)

                if key.bucketed_time is None:
                    group["totals"][output_field.get_name()] = value
                else:
                    index = timestamp_index[key.bucketed_time]
                    group["series"][output_field.get_name()][index] = value

    groups_as_list: List[SessionsQueryGroup] = [{
        "by": dict(by),
        "totals": group["totals"],
        "series": group["series"],
    } for by, group in groups.items()]

    def format_datetime(dt: datetime) -> str:
        return dt.isoformat().replace("+00:00", "Z")

    return {
        "start": format_datetime(query_clone.start),
        "end": format_datetime(query_clone.end),
        "query": query_clone.query,
        "intervals": [format_datetime(dt) for dt in intervals],
        "groups": groups_as_list,
    }
Esempio n. 5
0
def _fetch_tags_or_values_per_ids(
    projects: Sequence[Project],
    metric_names: Optional[Sequence[str]],
    referrer: str,
    column: str,
) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]:
    """
    Function that takes as input projects, metric_names, and a column, and based on the column
    selection, either returns tags or tag values for the combination of projects and metric_names
    selected or in the case of no metric_names passed, returns basically all the tags or the tag
    values available for those projects. In addition, when exactly one metric name is passed in
    metric_names, then the type (i.e. mapping to the entity) is also returned
    """
    try:
        metric_ids = _get_metrics_filter_ids(metric_names)
    except MetricDoesNotExistInIndexer:
        raise InvalidParams(
            f"Some or all of the metric names in {metric_names} do not exist in the indexer"
        )
    else:
        where = [Condition(Column("metric_id"), Op.IN, list(metric_ids))] if metric_ids else []

    tag_or_value_ids_per_metric_id = defaultdict(list)
    # This dictionary is required as a mapping from an entity to the ids available in it to
    # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single
    # entity by validating that the ids of the constituent metrics all lie in the same entity
    supported_metric_ids_in_entities = {}

    for metric_type in ("counter", "set", "distribution"):

        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        rows = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id"), Column(column)],
            where=where,
            groupby=[Column("metric_id"), Column(column)],
            referrer=referrer,
            projects=projects,
            org_id=projects[0].organization_id,
        )

        for row in rows:
            metric_id = row["metric_id"]
            if column.startswith("tags["):
                value_id = row[column]
                if value_id > 0:
                    tag_or_value_ids_per_metric_id[metric_id].append(value_id)
            else:
                tag_or_value_ids_per_metric_id[metric_id].extend(row[column])
            supported_metric_ids_in_entities.setdefault(metric_type, []).append(row["metric_id"])

    # If we get not results back from snuba, then raise an InvalidParams with an appropriate
    # error message
    if not tag_or_value_ids_per_metric_id:
        if metric_names:
            error_str = f"The following metrics {metric_names} do not exist in the dataset"
        else:
            error_str = "Dataset contains no metric data for your project selection"
        raise InvalidParams(error_str)

    tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values()
    if metric_names:
        # If there are metric_ids that map to the metric_names provided as an arg that were not
        # found in the dataset, then we raise an instance of InvalidParams exception
        if metric_ids != set(tag_or_value_ids_per_metric_id.keys()):
            # This can occur for metric names that don't have an equivalent in the dataset.
            raise InvalidParams(
                f"Not all the requested metrics or the constituent metrics in {metric_names} have "
                f"data in the dataset"
            )

        # At this point, we are sure that every metric_name/metric_id that was requested is
        # present in the dataset, and now we need to check that for all derived metrics requested
        # (if any are requested) are setup correctly i.e. constituent of
        # SingularEntityDerivedMetric actually span a single entity
        _validate_requested_derived_metrics_in_input_metrics(
            metric_names=metric_names,
            supported_metric_ids_in_entities=supported_metric_ids_in_entities,
        )

        # Only return tags/tag values that occur in all metrics
        tag_or_value_ids = set.intersection(*map(set, tag_or_value_id_lists))
    else:
        tag_or_value_ids = {tag_id for ids in tag_or_value_id_lists for tag_id in ids}

    if column.startswith("tags["):
        tag_id = column.split("tags[")[1].split("]")[0]
        tags_or_values = [
            {"key": reverse_resolve(int(tag_id)), "value": reverse_resolve(value_id)}
            for value_id in tag_or_value_ids
        ]
        tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"]))
    else:
        tags_or_values = [{"key": reverse_resolve(tag_id)} for tag_id in tag_or_value_ids]
        tags_or_values.sort(key=itemgetter("key"))

    if metric_names and len(metric_names) == 1:
        metric_type = list(supported_metric_ids_in_entities.keys())[0]
        return tags_or_values, metric_type
    return tags_or_values, None
Esempio n. 6
0
def get_tag_values(
        projects: Sequence[Project], tag_name: str,
        metric_names: Optional[Sequence[str]]) -> Sequence[TagValue]:
    """Get all known values for a specific tag"""
    assert projects

    tag_id = indexer.resolve(tag_name)
    if tag_id is None:
        raise InvalidParams

    try:
        metric_ids = _get_metrics_filter_ids(metric_names)
    except MetricDoesNotExistInIndexer:
        return []
    else:
        where = [Condition(Column("metric_id"), Op.IN, list(metric_ids))
                 ] if metric_ids else []

    tag_values = defaultdict(list)
    # This dictionary is required as a mapping from an entity to the ids available in it to
    # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single
    # entity by validating that the ids of the constituent metrics all lie in the same entity
    supported_metric_ids_in_entities = {}

    column_name = f"tags[{tag_id}]"
    for metric_type in ("counter", "set", "distribution"):
        supported_metric_ids_in_entities.setdefault(metric_type, [])

        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        rows = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id"),
                    Column(column_name)],
            where=where,
            groupby=[Column("metric_id"),
                     Column(column_name)],
            referrer="snuba.metrics.meta.get_tag_values",
            projects=projects,
            org_id=projects[0].organization_id,
        )
        for row in rows:
            value_id = row[column_name]
            supported_metric_ids_in_entities[metric_type].append(
                row["metric_id"])
            if value_id > 0:
                metric_id = row["metric_id"]
                tag_values[metric_id].append(value_id)

        # If we are trying to find the tag values for only one metric name, then no need to query
        # other entities once we find data for that metric_name in one of the entities
        if metric_names and len(metric_names) == 1 and rows:
            break

    value_id_lists = tag_values.values()
    if metric_names is not None:
        if metric_ids != set(tag_values.keys()):
            return []
        # At this point, we are sure that every metric_name/metric_id that was requested is
        # present in the dataset, and now we need to check that all derived metrics requested are
        # setup correctly
        _validate_requested_derived_metrics(
            metric_names=metric_names,
            supported_metric_ids_in_entities=supported_metric_ids_in_entities,
        )
        # Only return tags that occur in all metrics
        value_ids = set.intersection(*[set(ids) for ids in value_id_lists])
    else:
        value_ids = {value_id for ids in value_id_lists for value_id in ids}

    tags = [{
        "key": tag_name,
        "value": reverse_resolve(value_id)
    } for value_id in value_ids]
    tags.sort(key=lambda tag: (tag["key"], tag["value"]))

    return tags
Esempio n. 7
0
def get_tags(projects: Sequence[Project],
             metric_names: Optional[Sequence[str]]) -> Sequence[Tag]:
    """Get all metric tags for the given projects and metric_names"""
    assert projects

    try:
        metric_ids = _get_metrics_filter_ids(metric_names)
    except MetricDoesNotExistInIndexer:
        return []
    else:
        where = [Condition(Column("metric_id"), Op.IN, list(metric_ids))
                 ] if metric_ids else []

    tag_ids_per_metric_id = defaultdict(list)
    # This dictionary is required as a mapping from an entity to the ids available in it to
    # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single
    # entity by validating that the ids of the constituent metrics all lie in the same entity
    supported_metric_ids_in_entities = {}

    for metric_type in ("counter", "set", "distribution"):
        supported_metric_ids_in_entities.setdefault(metric_type, [])

        entity_key = METRIC_TYPE_TO_ENTITY[metric_type]
        rows = run_metrics_query(
            entity_key=entity_key,
            select=[Column("metric_id"),
                    Column("tags.key")],
            where=where,
            groupby=[Column("metric_id"),
                     Column("tags.key")],
            referrer="snuba.metrics.meta.get_tags",
            projects=projects,
            org_id=projects[0].organization_id,
        )

        for row in rows:
            tag_ids_per_metric_id[row["metric_id"]].extend(row["tags.key"])
            supported_metric_ids_in_entities[metric_type].append(
                row["metric_id"])

        # If we are trying to find the tags for only one metric name, then no need to query other
        # entities once we find data for that metric_name in one of the entity
        if metric_names and len(metric_names) == 1 and rows:
            break

    # If we get not results back from snuba, then just return an empty set
    if not tag_ids_per_metric_id:
        return []

    tag_id_lists = tag_ids_per_metric_id.values()
    if metric_names:
        # If there are metric_ids that were not found in the dataset, then just return an []
        if metric_ids != set(tag_ids_per_metric_id.keys()):
            # This can occur for metric names that don't have an equivalent in the dataset.
            return []

        # At this point, we are sure that every metric_name/metric_id that was requested is
        # present in the dataset, and now we need to check that all derived metrics requested are
        # setup correctly
        _validate_requested_derived_metrics(
            metric_names=metric_names,
            supported_metric_ids_in_entities=supported_metric_ids_in_entities,
        )

        # Only return tags that occur in all metrics
        tag_ids = set.intersection(*map(set, tag_id_lists))
    else:
        tag_ids = {tag_id for ids in tag_id_lists for tag_id in ids}

    tags = [{"key": reverse_resolve(tag_id)} for tag_id in tag_ids]
    tags.sort(key=itemgetter("key"))

    return tags
Esempio n. 8
0
 def _parse_tag(self, tag_string: str) -> str:
     tag_key = int(tag_string.replace("tags[", "").replace("]", ""))
     return reverse_resolve(tag_key)