예제 #1
0
    def translate_results(self):
        groups = {}
        for entity, subresults in self._results.items():
            totals = subresults["totals"]["data"]
            for data in totals:
                self._extract_data(data, groups)

            if "series" in subresults:
                series = subresults["series"]["data"]
                for data in series:
                    self._extract_data(data, groups)

        groups = [
            dict(
                by=dict(
                    (self._parse_tag(key), reverse_resolve_weak(value)
                     ) if key not in ALLOWED_GROUPBY_COLUMNS else (key, value)
                    for key, value in tags),
                **data,
            ) for tags, data in groups.items()
        ]

        # Applying post query operations for totals and series
        for group in groups:
            totals, series = group["totals"], group["series"]
            for op, metric_name in self._bottom_up_dependency_tree:
                metric_obj = metric_object_factory(op=op,
                                                   metric_name=metric_name)
                # Totals
                totals[metric_name] = metric_obj.run_post_query_function(
                    totals)
                # Series
                for idx in range(0, len(self._intervals)):
                    series.setdefault(
                        metric_name,
                        [metric_obj.generate_default_null_values()] *
                        len(self._intervals),
                    )
                    series[metric_name][
                        idx] = metric_obj.run_post_query_function(series, idx)

        # Remove the extra fields added due to the constituent metrics that were added
        # from the generated dependency tree. These metrics that are to be removed were added to
        # be able to generate fields that require further processing post query, but are not
        # required nor expected in the response
        for group in groups:
            totals, series = group["totals"], group["series"]
            for key in copy.deepcopy(list(totals.keys())):
                matches = FIELD_REGEX.match(key)
                if matches:
                    operation = matches[1]
                    metric_name = matches[2]
                else:
                    operation = None
                    metric_name = key
                if (operation,
                        metric_name) not in self._query_definition_fields_set:
                    del totals[key], series[key]

        return groups
예제 #2
0
 def _build_orderby(self, query_definition: QueryDefinition) -> Optional[List[OrderBy]]:
     if query_definition.orderby is None:
         return None
     (op, metric_name), direction = query_definition.orderby
     metric_field_obj = metric_object_factory(op, metric_name)
     return metric_field_obj.generate_orderby_clause(
         projects=self._projects, direction=direction
     )
예제 #3
0
    def _extract_data(self, data, groups):
        tags = tuple(
            (key, data[key]) for key in sorted(data.keys())
            if (key.startswith("tags[") or key in ALLOWED_GROUPBY_COLUMNS))

        tag_data = groups.setdefault(
            tags,
            {
                "totals": {},
                "series": {}
            },
        )

        bucketed_time = data.pop(TS_COL_GROUP, None)
        if bucketed_time is not None:
            bucketed_time = parse_snuba_datetime(bucketed_time)

        # We query the union of the query_definition fields, and the fields_in_entities from the
        # QueryBuilder necessary as it contains the constituent instances of
        # SingularEntityDerivedMetric for instances of CompositeEntityDerivedMetric
        for op, metric_name in self._set_of_constituent_queries:
            key = f"{op}({metric_name})" if op else metric_name

            default_null_value = metric_object_factory(
                op, metric_name).generate_default_null_values()

            try:
                value = data[key]
            except KeyError:
                # This could occur when we have derived metrics that are generated from post
                # query operations, and so don't have a direct mapping to the query results
                # or also from raw_metrics that don't exist in clickhouse yet
                cleaned_value = default_null_value
            else:
                if op in OPERATIONS_PERCENTILES:
                    value = value[0]
                cleaned_value = finite_or_none(value)

            if bucketed_time is None:
                # Only update the value, when either key does not exist or its a default
                if key not in tag_data["totals"] or tag_data["totals"][
                        key] == default_null_value:
                    tag_data["totals"][key] = cleaned_value

            if bucketed_time is not None or tag_data["totals"][
                    key] == default_null_value:
                empty_values = len(self._intervals) * [default_null_value]
                series = tag_data["series"].setdefault(key, empty_values)

                if bucketed_time is not None:
                    series_index = self._timestamp_index[bucketed_time]
                    if series[series_index] == default_null_value:
                        series[series_index] = cleaned_value
예제 #4
0
 def __update_query_dicts_with_component_entities(self, component_entities,
                                                  metric_name_to_obj_dict):
     # At this point in time, we are only supporting raw metrics in the metrics attribute of
     # any instance of DerivedMetric, and so in this case the op will always be None
     # ToDo(ahmed): In future PR, we might want to allow for dependency metrics to also have an
     #  an aggregate and in this case, we would need to parse the op here
     op = None
     for entity, metric_names in component_entities.items():
         for metric_name in metric_names:
             metric_key = (op, metric_name)
             if metric_key not in metric_name_to_obj_dict:
                 metric_name_to_obj_dict[
                     metric_key] = metric_object_factory(op, metric_name)
                 self._fields_in_entities.setdefault(entity,
                                                     []).append(metric_key)
     return metric_name_to_obj_dict
예제 #5
0
    def _build_queries(self, query_definition):
        metric_name_to_obj_dict = {}

        queries_by_entity = OrderedDict()
        for op, metric_name in query_definition.fields.values():
            metric_field_obj = metric_object_factory(op, metric_name)
            # `get_entity` is called the first, to fetch the entities of constituent metrics,
            # and validate especially in the case of SingularEntityDerivedMetric that it is
            # actually composed of metrics that belong to the same entity
            try:
                entity = metric_field_obj.get_entity(projects=self._projects)
            except MetricDoesNotExistException:
                # If we get here, it means that one or more of the constituent metrics for a
                # derived metric does not exist, and so no further attempts to query that derived
                # metric will be made, and the field value will be set to the default value in
                # the response
                continue

            if not entity:
                # ToDo(ahmed): When we get to an instance of a MetricFieldBase where entity is
                #  None, we know it is from a composite entity derived metric, and we need to
                #  traverse down the constituent metrics dependency tree until we get to instances
                #  of SingleEntityDerivedMetric, and add those to our queries so that we are able
                #  to generate the original CompositeEntityDerivedMetric later on as a result of
                #  a post query operation on the results of the constituent
                #  SingleEntityDerivedMetric instances
                continue

            if entity not in self._implemented_datasets:
                raise NotImplementedError(f"Dataset not yet implemented: {entity}")

            metric_name_to_obj_dict[(op, metric_name)] = metric_field_obj

            queries_by_entity.setdefault(entity, []).append((op, metric_name))

        where = self._build_where(query_definition)
        groupby = self._build_groupby(query_definition)

        queries_dict = {}
        for entity, fields in queries_by_entity.items():
            select = []
            metric_ids_set = set()
            for op, name in fields:
                metric_field_obj = metric_name_to_obj_dict[(op, name)]
                select += metric_field_obj.generate_select_statements(projects=self._projects)
                metric_ids_set |= metric_field_obj.generate_metric_ids()

            where_for_entity = [
                Condition(
                    Column("metric_id"),
                    Op.IN,
                    list(metric_ids_set),
                ),
            ]
            orderby = self._build_orderby(query_definition)

            queries_dict[entity] = self._build_totals_and_series_queries(
                entity=entity,
                select=select,
                where=where + where_for_entity,
                groupby=groupby,
                orderby=orderby,
                limit=query_definition.limit,
                offset=query_definition.offset,
                rollup=query_definition.rollup,
                intervals_len=len(list(get_intervals(query_definition))),
            )

        return queries_dict