Ejemplo n.º 1
0
    def translate_results(self):
        groups = {}
        for entity, subresults in self._results.items():
            totals = subresults["totals"]["data"]
            for data in totals:
                self._extract_data(data, groups)

            if "series" in subresults:
                series = subresults["series"]["data"]
                for data in series:
                    self._extract_data(data, groups)

        groups = [
            dict(
                by=dict(
                    (self._parse_tag(key), reverse_resolve_weak(value))
                    if key not in ALLOWED_GROUPBY_COLUMNS
                    else (key, value)
                    for key, value in tags
                ),
                **data,
            )
            for tags, data in groups.items()
        ]
        return groups
Ejemplo n.º 2
0
    def translate_results(self):
        groups = {}
        for entity, subresults in self._results.items():
            totals = subresults["totals"]["data"]
            for data in totals:
                self._extract_data(data, groups)

            if "series" in subresults:
                series = subresults["series"]["data"]
                for data in series:
                    self._extract_data(data, groups)

        groups = [
            dict(
                by=dict(
                    (self._parse_tag(key), reverse_resolve_weak(value)
                     ) if key not in ALLOWED_GROUPBY_COLUMNS else (key, value)
                    for key, value in tags),
                **data,
            ) for tags, data in groups.items()
        ]

        # Applying post query operations for totals and series
        for group in groups:
            totals, series = group["totals"], group["series"]
            for op, metric_name in self._bottom_up_dependency_tree:
                metric_obj = metric_object_factory(op=op,
                                                   metric_name=metric_name)
                # Totals
                totals[metric_name] = metric_obj.run_post_query_function(
                    totals)
                # Series
                for idx in range(0, len(self._intervals)):
                    series.setdefault(
                        metric_name,
                        [metric_obj.generate_default_null_values()] *
                        len(self._intervals),
                    )
                    series[metric_name][
                        idx] = metric_obj.run_post_query_function(series, idx)

        # Remove the extra fields added due to the constituent metrics that were added
        # from the generated dependency tree. These metrics that are to be removed were added to
        # be able to generate fields that require further processing post query, but are not
        # required nor expected in the response
        for group in groups:
            totals, series = group["totals"], group["series"]
            for key in copy.deepcopy(list(totals.keys())):
                matches = FIELD_REGEX.match(key)
                if matches:
                    operation = matches[1]
                    metric_name = matches[2]
                else:
                    operation = None
                    metric_name = key
                if (operation,
                        metric_name) not in self._query_definition_fields_set:
                    del totals[key], series[key]

        return groups
Ejemplo n.º 3
0
def run_sessions_query(
    org_id: int,
    query: QueryDefinition,
    span_op: str,
) -> SessionsQueryResult:
    """Convert a QueryDefinition to multiple snuba queries and reformat the results"""
    # This is necessary so that we do not mutate the query object shared between different
    # backend runs
    query_clone = deepcopy(query)

    data, metric_to_output_field = _fetch_data(org_id, query_clone)

    data_points = _flatten_data(org_id, data)

    intervals = list(get_intervals(query_clone))
    timestamp_index = {
        timestamp.isoformat(): index
        for index, timestamp in enumerate(intervals)
    }

    def default_for(field: SessionsQueryFunction) -> SessionsQueryValue:
        return 0 if field in ("sum(session)", "count_unique(user)") else None

    GroupKey = Tuple[Tuple[GroupByFieldName, Union[str, int]], ...]

    class Group(TypedDict):
        series: MutableMapping[SessionsQueryFunction, List[SessionsQueryValue]]
        totals: MutableMapping[SessionsQueryFunction, SessionsQueryValue]

    groups: MutableMapping[GroupKey, Group] = defaultdict(
        lambda: {
            "totals":
            {field: default_for(field)
             for field in query_clone.raw_fields},
            "series": {
                field: len(intervals) * [default_for(field)]
                for field in query_clone.raw_fields
            },
        })

    if len(data_points) == 0:
        # We're only interested in `session.status` group-byes. The rest of the
        # conditions require work (e.g. getting all environments) that we can't
        # get without querying the DB, including group-byes consisting of
        # multiple parameters (even if `session.status` is one of them).
        if query_clone.raw_groupby == ["session.status"]:
            for status in get_args(_SessionStatus):
                gkey: GroupKey = (("session.status", status), )
                groups[gkey]
    else:
        for key in data_points.keys():
            try:
                output_field = metric_to_output_field[key.metric_key,
                                                      key.column]
            except KeyError:
                continue  # secondary metric, like session.error

            by: MutableMapping[GroupByFieldName, Union[str, int]] = {}
            if key.release is not None:
                # Every session has a release, so this should not throw
                by["release"] = reverse_resolve(key.release)
            if key.environment is not None:
                # To match behavior of the old sessions backend, session data
                # without environment is grouped under the empty string.
                by["environment"] = reverse_resolve_weak(key.environment) or ""
            if key.project_id is not None:
                by["project"] = key.project_id

            for status_value in output_field.get_values(data_points, key):
                if status_value.session_status is not None:
                    by["session.status"] = status_value.session_status  # !

                group_key: GroupKey = tuple(sorted(by.items()))
                group: Group = groups[group_key]

                value = status_value.value
                if value is not None:
                    value = finite_or_none(value)

                if key.bucketed_time is None:
                    group["totals"][output_field.get_name()] = value
                else:
                    index = timestamp_index[key.bucketed_time]
                    group["series"][output_field.get_name()][index] = value

    groups_as_list: List[SessionsQueryGroup] = [{
        "by": dict(by),
        "totals": group["totals"],
        "series": group["series"],
    } for by, group in groups.items()]

    def format_datetime(dt: datetime) -> str:
        return dt.isoformat().replace("+00:00", "Z")

    return {
        "start": format_datetime(query_clone.start),
        "end": format_datetime(query_clone.end),
        "query": query_clone.query,
        "intervals": [format_datetime(dt) for dt in intervals],
        "groups": groups_as_list,
    }