def aggregations(request): """ Fetches aggregation data. Expects a JSON object in the body containing the query parameters, e.g.:: { "query": { "channel": "nightly", "probe": "gc_ms", "versions": ["70"], # OR ["70", "69", "68"] "aggregationLevel": "version" # OR "build_id" } } Returns a JSON object containing the histogram data and metadata, e.g.:: { "response": [ { "data": [ { "client_agg_type": "summed-histogram", "histogram": { "0": 0.0, "1": 1920.963, ... }, "percentiles": { "0": 1.0, "10": 1.0259, ... }, "total_users": 1604 } ], "metadata": { "build_id": null, "channel": "nightly", "metric": "gc_ms", "metric_type": "histogram-exponential", "os": "Linux", "version": "70" } } ] } """ labels_cache = caches["probe-labels"] if labels_cache.get("__labels__") is None: Probe.populate_labels_cache() REQUIRED_QUERY_PARAMETERS = [ "channel", "probe", "versions", "aggregationLevel" ] body = request.data if body is None or body.get("query") is None: raise ValidationError("Unexpected JSON body") q = body["query"] if any([k not in q.keys() for k in REQUIRED_QUERY_PARAMETERS]): # Figure out which query parameter is missing. missing = set(REQUIRED_QUERY_PARAMETERS) - set(q.keys()) raise ValidationError("Missing required query parameters: {}".format( ", ".join(sorted(missing)))) dimensions = [ Q(metric=q.get("probe")), Q(channel=CHANNEL_IDS[q.get("channel")]), Q(version__in=map(str, q.get("versions"))), Q(os=q.get("os")), ] # Whether to pull aggregations by version or build_id. if q["aggregationLevel"] == "version": dimensions.append(Q(build_id=None)) elif q["aggregationLevel"] == "build_id": dimensions.append(~Q(build_id=None)) result = Aggregation.objects.filter(*dimensions) response = {} for row in result: metadata = { "channel": CHANNEL_NAMES[row.channel], "version": row.version, "os": row.os, "build_id": row.build_id, "metric": row.metric, "metric_type": row.metric_type, } aggs = {d["key"]: round(d["value"], 4) for d in row.data} # We use these keys to merge data dictionaries. key = "{channel}-{version}-{metric}-{os}-{build_id}".format(**metadata) sub_key = "{key}-{client_agg_type}".format( key=row.metric_key, client_agg_type=row.client_agg_type) record = response.get(key, {}) if "metadata" not in record: record["metadata"] = metadata if sub_key not in record: record[sub_key] = {} new_data = {} if row.agg_type == AGGREGATION_HISTOGRAM: new_data["total_users"] = row.total_users # Check for labels. labels = labels_cache.get(metadata["metric"]) if labels is not None: # Replace the numeric indexes with their labels. aggs_w_labels = {} for k, v in aggs.items(): try: aggs_w_labels[labels[int(k)]] = v except IndexError: pass aggs = aggs_w_labels new_data[AGGREGATION_NAMES[row.agg_type]] = aggs if row.metric_key: new_data["key"] = row.metric_key if row.client_agg_type: new_data["client_agg_type"] = row.client_agg_type data = record[sub_key].get("data", {}) data.update(new_data) record[sub_key]["data"] = data response[key] = record if not response: raise NotFound("No documents found for the given parameters") # Strip out the merge keys when returning the response. return Response({ "response": [{ "metadata": r.pop("metadata"), "data": [d["data"] for d in r.values()] } for r in response.values()] })
def get_firefox_aggregations(request, **kwargs): # TODO: When glam starts sending "product", make it required. REQUIRED_QUERY_PARAMETERS = ["channel", "probe", "aggregationLevel"] if any([k not in kwargs.keys() for k in REQUIRED_QUERY_PARAMETERS]): # Figure out which query parameter is missing. missing = set(REQUIRED_QUERY_PARAMETERS) - set(kwargs.keys()) raise ValidationError( "Missing required query parameters: {}".format(", ".join(sorted(missing))) ) # Ensure that the product provided is one we support, defaulting to Firefox. product = "firefox" channel = kwargs.get("channel") model_key = f"{product}-{channel}" # If release channel, make sure the user is authenticated. if channel == constants.CHANNEL_NAMES[constants.CHANNEL_RELEASE]: if not request.user.is_authenticated: raise PermissionDenied() MODEL_MAP = { "firefox-nightly": DesktopNightlyAggregationView, "firefox-beta": DesktopBetaAggregationView, "firefox-release": DesktopReleaseAggregationView, } try: model = MODEL_MAP[model_key] except KeyError: raise ValidationError("Product not currently supported.") num_versions = kwargs.get("versions", 3) try: max_version = int(model.objects.aggregate(Max("version"))["version__max"]) except (ValueError, KeyError): raise ValidationError("Query version cannot be determined") except TypeError: # This happens when `version_max` is NULL and cannot be converted to an int, # suggesting that we have no data for this model. raise NotFound("No data found for the provided parameters") versions = list(map(str, range(max_version, max_version - num_versions, -1))) labels_cache = caches["probe-labels"] if labels_cache.get("__labels__") is None: Probe.populate_labels_cache() os = kwargs.get("os", "*") dimensions = [ Q(metric=kwargs["probe"]), Q(version__in=versions), Q(os=os), ] aggregation_level = kwargs["aggregationLevel"] # Whether to pull aggregations by version or build_id. if aggregation_level == "version": dimensions.append(Q(build_id="*")) counts = _get_firefox_counts(channel, os, versions, by_build=False) elif aggregation_level == "build_id": dimensions.append(~Q(build_id="*")) counts = _get_firefox_counts(channel, os, versions, by_build=True) if "process" in kwargs: dimensions.append(Q(process=kwargs["process"])) result = model.objects.filter(*dimensions) response = [] for row in result: data = { "version": row.version, "os": row.os, "build_id": row.build_id, "process": row.process, "metric": row.metric, "metric_key": row.metric_key, "metric_type": row.metric_type, "total_users": row.total_users, "histogram": row.histogram and orjson.loads(row.histogram) or "", "percentiles": row.percentiles and orjson.loads(row.percentiles) or "", } if row.client_agg_type: if row.metric_type == "boolean": data["client_agg_type"] = "boolean-histogram" else: data["client_agg_type"] = row.client_agg_type # Get the total distinct client IDs for this set of dimensions. data["total_addressable_market"] = counts.get(f"{row.version}-{row.build_id}") response.append(data) return response