Ejemplo n.º 1
0
def aggregations(request):
    """
    Fetches aggregation data.

    Expects a JSON object in the body containing the query parameters, e.g.::

        {
            "query": {
                "channel": "nightly",
                "probe": "gc_ms",
                "versions": ["70"],  # OR ["70", "69", "68"]
                "aggregationLevel": "version"  # OR "build_id"
            }
        }

    Returns a JSON object containing the histogram data and metadata, e.g.::

        {
            "response": [
                {
                    "data": [
                        {
                            "client_agg_type": "summed-histogram",
                            "histogram": {
                                "0": 0.0,
                                "1": 1920.963,
                                ...
                            },
                            "percentiles": {
                                "0": 1.0,
                                "10": 1.0259,
                                ...
                            },
                            "total_users": 1604
                        }
                    ],
                    "metadata": {
                        "build_id": null,
                        "channel": "nightly",
                        "metric": "gc_ms",
                        "metric_type": "histogram-exponential",
                        "os": "Linux",
                        "version": "70"
                    }
                }
            ]
        }

    """
    labels_cache = caches["probe-labels"]
    if labels_cache.get("__labels__") is None:
        Probe.populate_labels_cache()

    REQUIRED_QUERY_PARAMETERS = [
        "channel", "probe", "versions", "aggregationLevel"
    ]
    body = request.data

    if body is None or body.get("query") is None:
        raise ValidationError("Unexpected JSON body")

    q = body["query"]

    if any([k not in q.keys() for k in REQUIRED_QUERY_PARAMETERS]):
        # Figure out which query parameter is missing.
        missing = set(REQUIRED_QUERY_PARAMETERS) - set(q.keys())
        raise ValidationError("Missing required query parameters: {}".format(
            ", ".join(sorted(missing))))

    dimensions = [
        Q(metric=q.get("probe")),
        Q(channel=CHANNEL_IDS[q.get("channel")]),
        Q(version__in=map(str, q.get("versions"))),
        Q(os=q.get("os")),
    ]

    # Whether to pull aggregations by version or build_id.
    if q["aggregationLevel"] == "version":
        dimensions.append(Q(build_id=None))
    elif q["aggregationLevel"] == "build_id":
        dimensions.append(~Q(build_id=None))

    result = Aggregation.objects.filter(*dimensions)

    response = {}

    for row in result:

        metadata = {
            "channel": CHANNEL_NAMES[row.channel],
            "version": row.version,
            "os": row.os,
            "build_id": row.build_id,
            "metric": row.metric,
            "metric_type": row.metric_type,
        }
        aggs = {d["key"]: round(d["value"], 4) for d in row.data}

        # We use these keys to merge data dictionaries.
        key = "{channel}-{version}-{metric}-{os}-{build_id}".format(**metadata)
        sub_key = "{key}-{client_agg_type}".format(
            key=row.metric_key, client_agg_type=row.client_agg_type)

        record = response.get(key, {})
        if "metadata" not in record:
            record["metadata"] = metadata

        if sub_key not in record:
            record[sub_key] = {}

        new_data = {}

        if row.agg_type == AGGREGATION_HISTOGRAM:
            new_data["total_users"] = row.total_users
            # Check for labels.
            labels = labels_cache.get(metadata["metric"])
            if labels is not None:
                # Replace the numeric indexes with their labels.
                aggs_w_labels = {}
                for k, v in aggs.items():
                    try:
                        aggs_w_labels[labels[int(k)]] = v
                    except IndexError:
                        pass
                aggs = aggs_w_labels

        new_data[AGGREGATION_NAMES[row.agg_type]] = aggs

        if row.metric_key:
            new_data["key"] = row.metric_key

        if row.client_agg_type:
            new_data["client_agg_type"] = row.client_agg_type

        data = record[sub_key].get("data", {})
        data.update(new_data)

        record[sub_key]["data"] = data
        response[key] = record

    if not response:
        raise NotFound("No documents found for the given parameters")

    # Strip out the merge keys when returning the response.
    return Response({
        "response": [{
            "metadata": r.pop("metadata"),
            "data": [d["data"] for d in r.values()]
        } for r in response.values()]
    })
Ejemplo n.º 2
0
def get_firefox_aggregations(request, **kwargs):
    # TODO: When glam starts sending "product", make it required.
    REQUIRED_QUERY_PARAMETERS = ["channel", "probe", "aggregationLevel"]
    if any([k not in kwargs.keys() for k in REQUIRED_QUERY_PARAMETERS]):
        # Figure out which query parameter is missing.
        missing = set(REQUIRED_QUERY_PARAMETERS) - set(kwargs.keys())
        raise ValidationError(
            "Missing required query parameters: {}".format(", ".join(sorted(missing)))
        )

    # Ensure that the product provided is one we support, defaulting to Firefox.
    product = "firefox"
    channel = kwargs.get("channel")
    model_key = f"{product}-{channel}"

    # If release channel, make sure the user is authenticated.
    if channel == constants.CHANNEL_NAMES[constants.CHANNEL_RELEASE]:
        if not request.user.is_authenticated:
            raise PermissionDenied()

    MODEL_MAP = {
        "firefox-nightly": DesktopNightlyAggregationView,
        "firefox-beta": DesktopBetaAggregationView,
        "firefox-release": DesktopReleaseAggregationView,
    }

    try:
        model = MODEL_MAP[model_key]
    except KeyError:
        raise ValidationError("Product not currently supported.")

    num_versions = kwargs.get("versions", 3)
    try:
        max_version = int(model.objects.aggregate(Max("version"))["version__max"])
    except (ValueError, KeyError):
        raise ValidationError("Query version cannot be determined")
    except TypeError:
        # This happens when `version_max` is NULL and cannot be converted to an int,
        # suggesting that we have no data for this model.
        raise NotFound("No data found for the provided parameters")

    versions = list(map(str, range(max_version, max_version - num_versions, -1)))

    labels_cache = caches["probe-labels"]
    if labels_cache.get("__labels__") is None:
        Probe.populate_labels_cache()

    os = kwargs.get("os", "*")

    dimensions = [
        Q(metric=kwargs["probe"]),
        Q(version__in=versions),
        Q(os=os),
    ]

    aggregation_level = kwargs["aggregationLevel"]
    # Whether to pull aggregations by version or build_id.
    if aggregation_level == "version":
        dimensions.append(Q(build_id="*"))
        counts = _get_firefox_counts(channel, os, versions, by_build=False)
    elif aggregation_level == "build_id":
        dimensions.append(~Q(build_id="*"))
        counts = _get_firefox_counts(channel, os, versions, by_build=True)

    if "process" in kwargs:
        dimensions.append(Q(process=kwargs["process"]))
    result = model.objects.filter(*dimensions)

    response = []

    for row in result:

        data = {
            "version": row.version,
            "os": row.os,
            "build_id": row.build_id,
            "process": row.process,
            "metric": row.metric,
            "metric_key": row.metric_key,
            "metric_type": row.metric_type,
            "total_users": row.total_users,
            "histogram": row.histogram and orjson.loads(row.histogram) or "",
            "percentiles": row.percentiles and orjson.loads(row.percentiles) or "",
        }
        if row.client_agg_type:
            if row.metric_type == "boolean":
                data["client_agg_type"] = "boolean-histogram"
            else:
                data["client_agg_type"] = row.client_agg_type

        # Get the total distinct client IDs for this set of dimensions.
        data["total_addressable_market"] = counts.get(f"{row.version}-{row.build_id}")

        response.append(data)

    return response