def translate_results(self): groups = {} for entity, subresults in self._results.items(): totals = subresults["totals"]["data"] for data in totals: self._extract_data(data, groups) if "series" in subresults: series = subresults["series"]["data"] for data in series: self._extract_data(data, groups) groups = [ dict( by=dict( (self._parse_tag(key), reverse_resolve_weak(value)) if key not in ALLOWED_GROUPBY_COLUMNS else (key, value) for key, value in tags ), **data, ) for tags, data in groups.items() ] return groups
def translate_results(self): groups = {} for entity, subresults in self._results.items(): totals = subresults["totals"]["data"] for data in totals: self._extract_data(data, groups) if "series" in subresults: series = subresults["series"]["data"] for data in series: self._extract_data(data, groups) groups = [ dict( by=dict( (self._parse_tag(key), reverse_resolve_weak(value) ) if key not in ALLOWED_GROUPBY_COLUMNS else (key, value) for key, value in tags), **data, ) for tags, data in groups.items() ] # Applying post query operations for totals and series for group in groups: totals, series = group["totals"], group["series"] for op, metric_name in self._bottom_up_dependency_tree: metric_obj = metric_object_factory(op=op, metric_name=metric_name) # Totals totals[metric_name] = metric_obj.run_post_query_function( totals) # Series for idx in range(0, len(self._intervals)): series.setdefault( metric_name, [metric_obj.generate_default_null_values()] * len(self._intervals), ) series[metric_name][ idx] = metric_obj.run_post_query_function(series, idx) # Remove the extra fields added due to the constituent metrics that were added # from the generated dependency tree. These metrics that are to be removed were added to # be able to generate fields that require further processing post query, but are not # required nor expected in the response for group in groups: totals, series = group["totals"], group["series"] for key in copy.deepcopy(list(totals.keys())): matches = FIELD_REGEX.match(key) if matches: operation = matches[1] metric_name = matches[2] else: operation = None metric_name = key if (operation, metric_name) not in self._query_definition_fields_set: del totals[key], series[key] return groups
def run_sessions_query( org_id: int, query: QueryDefinition, span_op: str, ) -> SessionsQueryResult: """Convert a QueryDefinition to multiple snuba queries and reformat the results""" # This is necessary so that we do not mutate the query object shared between different # backend runs query_clone = deepcopy(query) data, metric_to_output_field = _fetch_data(org_id, query_clone) data_points = _flatten_data(org_id, data) intervals = list(get_intervals(query_clone)) timestamp_index = { timestamp.isoformat(): index for index, timestamp in enumerate(intervals) } def default_for(field: SessionsQueryFunction) -> SessionsQueryValue: return 0 if field in ("sum(session)", "count_unique(user)") else None GroupKey = Tuple[Tuple[GroupByFieldName, Union[str, int]], ...] class Group(TypedDict): series: MutableMapping[SessionsQueryFunction, List[SessionsQueryValue]] totals: MutableMapping[SessionsQueryFunction, SessionsQueryValue] groups: MutableMapping[GroupKey, Group] = defaultdict( lambda: { "totals": {field: default_for(field) for field in query_clone.raw_fields}, "series": { field: len(intervals) * [default_for(field)] for field in query_clone.raw_fields }, }) if len(data_points) == 0: # We're only interested in `session.status` group-byes. The rest of the # conditions require work (e.g. getting all environments) that we can't # get without querying the DB, including group-byes consisting of # multiple parameters (even if `session.status` is one of them). if query_clone.raw_groupby == ["session.status"]: for status in get_args(_SessionStatus): gkey: GroupKey = (("session.status", status), ) groups[gkey] else: for key in data_points.keys(): try: output_field = metric_to_output_field[key.metric_key, key.column] except KeyError: continue # secondary metric, like session.error by: MutableMapping[GroupByFieldName, Union[str, int]] = {} if key.release is not None: # Every session has a release, so this should not throw by["release"] = reverse_resolve(key.release) if key.environment is not None: # To match behavior of the old sessions backend, session data # without environment is grouped under the empty string. by["environment"] = reverse_resolve_weak(key.environment) or "" if key.project_id is not None: by["project"] = key.project_id for status_value in output_field.get_values(data_points, key): if status_value.session_status is not None: by["session.status"] = status_value.session_status # ! group_key: GroupKey = tuple(sorted(by.items())) group: Group = groups[group_key] value = status_value.value if value is not None: value = finite_or_none(value) if key.bucketed_time is None: group["totals"][output_field.get_name()] = value else: index = timestamp_index[key.bucketed_time] group["series"][output_field.get_name()][index] = value groups_as_list: List[SessionsQueryGroup] = [{ "by": dict(by), "totals": group["totals"], "series": group["series"], } for by, group in groups.items()] def format_datetime(dt: datetime) -> str: return dt.isoformat().replace("+00:00", "Z") return { "start": format_datetime(query_clone.start), "end": format_datetime(query_clone.end), "query": query_clone.query, "intervals": [format_datetime(dt) for dt in intervals], "groups": groups_as_list, }