def _histogram(self, prefix, channel, version, dates, metric, value, expected_count): if metric.endswith('CONTENT_DOCUMENTS_DESTROYED' ): # Ignore USE_COUNTER2_ support histograms return resp = self.as_json( self.app.get( '/aggregates_by/{}/channels/{}/?version={}&dates={}&metric={}'. format(prefix, channel, version, ','.join(dates), metric))) self.assertEqual(len(resp['data']), len(dates)) bucket_index = COUNT_HISTOGRAM_LABELS.index(COUNT_SCALAR_BUCKET) for res in resp['data']: # From pings before bug 1218576 (old), `count` is the number of processes. # From pings after bug 1218576 (new), `count` is the number of process types. old_pings_expected_count = expected_count * ( NUM_PINGS_PER_DIMENSIONS - NUM_AGGREGATED_CHILD_PINGS) / NUM_PINGS_PER_DIMENSIONS new_pings_expected_count = expected_count * NUM_AGGREGATED_CHILD_PINGS / NUM_PINGS_PER_DIMENSIONS self.assertEqual( res['count'], new_pings_expected_count * NUM_PROCESS_TYPES + old_pings_expected_count * (NUM_CHILDREN_PER_PING + 1)) if value['histogram_type'] == 4: # Count histogram current = pd.Series(res['histogram'], index=map(int, resp['buckets'])) expected = pd.Series(index=COUNT_HISTOGRAM_LABELS, data=0) expected[COUNT_SCALAR_BUCKET] = res['count'] self.assertEqual(res['histogram'][bucket_index], res['count']) self.assertEqual(res['sum'], value['sum'] * res['count']) self.assertTrue((current == expected).all()) elif metric.startswith('USE_COUNTER2_'): if metric.endswith('_PAGE'): destroyed = histograms_template[ 'TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED']['sum'] else: destroyed = histograms_template[ 'CONTENT_DOCUMENTS_DESTROYED']['sum'] value['values']['0'] = destroyed - value['values']['1'] current = pd.Series(res['histogram'], index=map(int, resp['buckets'])) expected = Histogram(metric, value).get_value() * res['count'] self.assertTrue((current == expected).all()) self.assertEqual(res['sum'], value['sum'] * res['count']) else: ind_type = int if value[ 'histogram_type'] != 5 else str # Categorical histograms current = pd.Series(res['histogram'], index=map(ind_type, resp['buckets'])) expected = Histogram(metric, value).get_value() * res['count'] self.assertTrue((current == expected).all()) self.assertEqual(res['sum'], value['sum'] * res['count'])
def test_categorical_histogram_dict_value(): cat2 = Histogram('TELEMETRY_TEST_CATEGORICAL', {'values': { u'0': 2, u'1': 1, u'2': 0, u'3': 0 }}) assert all(cat2.get_value() == series)
def get_dates_metrics(prefix, channel): mapping = {"true": True, "false": False} dimensions = {k: mapping.get(v, v) for k, v in request.args.iteritems()} # Get dates dates = dimensions.pop('dates', "").split(',') version = dimensions.pop('version', None) metric = dimensions.get('metric', None) if not dates or not version or not metric: abort(404) if metric == "SEARCH_COUNTS": abort(404) # Get bucket labels if metric.startswith("SIMPLE_MEASURES_"): labels = simple_measures_labels kind = "exponential" description = "" else: revision = histogram_revision_map.get(channel, "nightly") # Use nightly revision if the channel is unknown try: definition = Histogram(metric, {"values": {}}, revision=revision) except KeyError: # Couldn't find the histogram definition abort(404) kind = definition.kind description = definition.definition.description() if kind == "count": labels = count_histogram_labels dimensions["metric"] = "[[COUNT]]_{}".format(metric) elif kind == "flag": labels = [0, 1] else: labels = definition.get_value().keys().tolist() # Fetch metrics result = execute_query("select * from batched_get_metric(%s, %s, %s, %s, %s)", (prefix, channel, version, dates, json.dumps(dimensions))) if not result: abort(404) pretty_result = {"data": [], "buckets": labels, "kind": kind, "description": description} for row in result: date = row[0] label = row[1] histogram = row[2][:-2] sum = row[2][-2] count = row[2][-1] pretty_result["data"].append({"date": date, "label": label, "histogram": histogram, "count": count, "sum": sum}) return json.dumps(pretty_result)
def _get_complete_histogram(channel, metric, values): revision = histogram_revision_map[channel] for prefix, labels in SCALAR_MEASURE_MAP.iteritems(): if metric.startswith(prefix): histogram = pd.Series({int(k): v for k, v in values.iteritems()}, index=labels).fillna(0) break else: histogram = Histogram(metric, {"values": values}, revision=revision).get_value(autocast=False) return {str(k): long(v) for k, v in histogram.to_dict().iteritems()}
def _get_complete_histogram(channel, metric, values): revision = histogram_revision_map[channel] for prefix, labels in SCALAR_MEASURE_MAP.iteritems(): if metric.startswith(prefix): histogram = pd.Series({int(k): v for k, v in values.iteritems()}, index=labels).fillna(0) break else: histogram = Histogram(metric, { "values": values }, revision=revision).get_value(autocast=False) return {str(k): long(v) for k, v in histogram.to_dict().iteritems()}
def test_histogram(prefix, channel, version, dates, metric, value, expected_count): if metric.endswith("CONTENT_DOCUMENTS_DESTROYED" ): # Ignore USE_COUNTER2_ support histograms return reply = requests.get( "{}/aggregates_by/{}/channels/{}?version={}&dates={}&metric={}".format( SERVICE_URI, prefix, channel, version, ",".join(dates), metric)).json() assert (len(reply["data"]) == len(dates)) bucket_index = count_histogram_labels.index(COUNT_SCALAR_BUCKET) for res in reply["data"]: assert (res["count"] == expected_count * (NUM_CHILDREN_PER_PING + 1)) if value["histogram_type"] == 4: # Count histogram current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = pd.Series(index=count_histogram_labels, data=0) expected[COUNT_SCALAR_BUCKET] = res["count"] assert (res["histogram"][bucket_index] == res["count"]) assert (res["sum"] == value["sum"] * res["count"]) assert ((current == expected).all()) elif metric.startswith("USE_COUNTER2_"): if metric.endswith("_PAGE"): destroyed = histograms_template[ "TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED"]["sum"] else: destroyed = histograms_template["CONTENT_DOCUMENTS_DESTROYED"][ "sum"] value["values"]["0"] = destroyed - value["values"]["1"] current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = Histogram(metric, value).get_value() * res["count"] assert ((current == expected).all()) assert (res["sum"] == value["sum"] * res["count"]) else: current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = Histogram(metric, value).get_value() * res["count"] assert ((current == expected).all()) assert (res["sum"] == value["sum"] * res["count"])
def test_categorical_histogram_add(): cat2 = Histogram("TELEMETRY_TEST_CATEGORICAL", [1, 1, 0, 1]) added = categorical_hist + cat2 assert added.buckets['CommonLabel'] == 3 assert added.buckets['Label2'] == 2 assert added.buckets['Label3'] == 0 assert added.buckets[CATEGORICAL_HISTOGRAM_SPILL_BUCKET_NAME] == 1
def test_histogram_with_revision(): # Histogram with revision Histogram("HTTPCONNMGR_USED_SPECULATIVE_CONN", [ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.693147182464599, 0.480453014373779, -1, -1 ], "http://hg.mozilla.org/mozilla-central/rev/37ddc5e2eb72")
def _keyed_histogram(self, prefix, channel, version, dates, metric, histograms, expected_count): resp = self.as_json( self.app.get( '/aggregates_by/{}/channels/{}/?version={}&dates={}&metric={}'. format(prefix, channel, version, ','.join(dates), metric))) self.assertEqual(len(resp['data']), len(histograms) * len(dates)) for label, value in histograms.iteritems(): resp = self.as_json( self.app.get( '/aggregates_by/{}/channels/{}/?version={}&dates={}&metric={}&label={}' .format(prefix, channel, version, ','.join(dates), metric, label))) self.assertEqual(len(resp['data']), len(dates)) for res in resp['data']: old_pings_expected_count = expected_count * ( NUM_PINGS_PER_DIMENSIONS - NUM_AGGREGATED_CHILD_PINGS) / NUM_PINGS_PER_DIMENSIONS new_pings_expected_count = expected_count * NUM_AGGREGATED_CHILD_PINGS / NUM_PINGS_PER_DIMENSIONS self.assertEqual( res['count'], new_pings_expected_count * NUM_PROCESS_TYPES + old_pings_expected_count * (NUM_CHILDREN_PER_PING + 1)) current = pd.Series(res['histogram'], index=map(int, resp['buckets'])) expected = Histogram(metric, value).get_value() * res['count'] self.assertTrue((current == expected).all()) self.assertEqual(res['sum'], value['sum'] * res['count'])
def test_keyed_histogram(prefix, channel, version, dates, metric, histograms, expected_count): reply = requests.get( "{}/aggregates_by/{}/channels/{}?version={}&dates={}&metric={}".format( SERVICE_URI, prefix, channel, version, ",".join(dates), metric)).json() assert (len(reply["data"]) == len(histograms) * len(dates)) for label, value in histograms.iteritems(): reply = requests.get( "{}/aggregates_by/{}/channels/{}?version={}&dates={}&metric={}&label={}" .format(SERVICE_URI, prefix, channel, version, ",".join(dates), metric, label)).json() assert (len(reply["data"]) == len(dates)) for res in reply["data"]: assert (res["count"] == expected_count * (NUM_CHILDREN_PER_PING + 1)) current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = Histogram(metric, value).get_value() * res["count"] assert ((current == expected).all()) assert (res["sum"] == value["sum"] * res["count"])
def test_startup_histogram(): # Startup histogram Histogram("STARTUP_HTTPCONNMGR_USED_SPECULATIVE_CONN", [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.693147182464599, 0.480453014373779, -1, -1])
def setup_module(): global categorical_hist, series categorical_hist = Histogram("TELEMETRY_TEST_CATEGORICAL", [2, 1, 0, 0]) series = pd.Series([2, 1, 0, 0], index=[ 'CommonLabel', 'Label2', 'Label3', CATEGORICAL_HISTOGRAM_SPILL_BUCKET_NAME ], dtype='int64')
def test_histogram_with_computed_value(): # Histogram with computed value Histogram("GC_REASON_2", [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 0, 0, 11, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2517, -1, -1, 116979, 0])
def test_histogram(prefix, channel, version, dates, metric, value, expected_count): if metric.endswith("CONTENT_DOCUMENTS_DESTROYED"): # Ignore USE_COUNTER2_ support histograms return reply = requests.get("{}/aggregates_by/{}/channels/{}?version={}&dates={}&metric={}".format(SERVICE_URI, prefix, channel, version, ",".join(dates), metric)).json() assert(len(reply["data"]) == len(dates)) bucket_index = COUNT_HISTOGRAM_LABELS.index(COUNT_SCALAR_BUCKET) for res in reply["data"]: # From pings before bug 1218576 (old), `count` is the number of processes. # From pings after bug 1218576 (new), `count` is the number of process types. old_pings_expected_count = expected_count * (NUM_PINGS_PER_DIMENSIONS - NUM_AGGREGATED_CHILD_PINGS) / NUM_PINGS_PER_DIMENSIONS new_pings_expected_count = expected_count * NUM_AGGREGATED_CHILD_PINGS / NUM_PINGS_PER_DIMENSIONS assert(res["count"] == new_pings_expected_count*NUM_PROCESS_TYPES + old_pings_expected_count*(NUM_CHILDREN_PER_PING + 1)) if value["histogram_type"] == 4: # Count histogram current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = pd.Series(index=COUNT_HISTOGRAM_LABELS, data=0) expected[COUNT_SCALAR_BUCKET] = res["count"] assert(res["histogram"][bucket_index] == res["count"]) assert(res["sum"] == value["sum"]*res["count"]) assert((current == expected).all()) elif metric.startswith("USE_COUNTER2_"): if metric.endswith("_PAGE"): destroyed = histograms_template["TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED"]["sum"] else: destroyed = histograms_template["CONTENT_DOCUMENTS_DESTROYED"]["sum"] value["values"]["0"] = destroyed - value["values"]["1"] current = pd.Series(res["histogram"], index=map(int, reply["buckets"])) expected = Histogram(metric, value).get_value()*res["count"] assert((current == expected).all()) assert(res["sum"] == value["sum"]*res["count"]) else: ind_type = int if value["histogram_type"] != 5 else str #categorical histograms current = pd.Series(res["histogram"], index=map(ind_type, reply["buckets"])) expected = Histogram(metric, value).get_value()*res["count"] assert((current == expected).all()) assert(res["sum"] == value["sum"]*res["count"])
def get_dates_metrics(prefix, channel): mapping = {"true": True, "false": False} dimensions = {k: mapping.get(v, v) for k, v in request.args.iteritems()} # Get dates dates = dimensions.pop('dates', "").split(',') version = dimensions.pop('version', None) metric = dimensions.get('metric', None) if not dates or not version or not metric: abort(404) # Get bucket labels if metric.startswith("SIMPLE_MEASURES_"): labels = simple_measures_labels kind = "exponential" description = "" else: revision = histogram_revision_map.get( channel, "nightly") # Use nightly revision if the channel is unknown try: definition = Histogram(metric, {"values": {}}, revision=revision) except KeyError: # Couldn't find the histogram definition abort(404) kind = definition.kind description = definition.definition.description() if kind == "count": labels = count_histogram_labels dimensions["metric"] = "[[COUNT]]_{}".format(metric) elif kind == "flag": labels = [0, 1] else: labels = definition.get_value().keys().tolist() # Fetch metrics result = execute_query( "select * from batched_get_metric(%s, %s, %s, %s, %s)", (prefix, channel, version, dates, json.dumps(dimensions))) if not result: abort(404) pretty_result = { "data": [], "buckets": labels, "kind": kind, "description": description } for row in result: date = row[0] label = row[1] histogram = row[2][:-2] sum = row[2][-2] count = row[2][-1] pretty_result["data"].append({ "date": date, "label": label, "histogram": histogram, "count": count, "sum": sum }) return json.dumps(pretty_result)
def get_dates_metrics(prefix, channel): mapping = {"true": True, "false": False} dimensions = {k: mapping.get(v, v) for k, v in request.args.iteritems()} extra_dimensions = dimensions.viewkeys() - ALLOWED_DIMENSIONS if extra_dimensions: # We received an unsupported query string to filter by, return 405. valid_url = '{}?{}'.format( request.path, urlencode({k: v for k, v in dimensions.items() if k in ALLOWED_DIMENSIONS})) raise MethodNotAllowed(valid_methods=[valid_url]) if 'child' in dimensions: # Process types in the db are true/false, not content/process new_process_map = {"content": True, "parent": False} dimensions['child'] = new_process_map.get(dimensions['child'], dimensions['child']) # Get dates dates = dimensions.pop('dates', '').split(',') version = dimensions.pop('version', None) metric = dimensions.get('metric') if not dates or not version or not metric: abort(404, description="Missing date or version or metric. All three are required.") if not _allow_metric(channel, metric): abort(404, description="This metric is not allowed to be served.") # Get bucket labels for _prefix, _labels in SCALAR_MEASURE_MAP.iteritems(): if metric.startswith(_prefix) and _prefix != COUNT_HISTOGRAM_PREFIX: labels = _labels kind = "exponential" try: description = _get_description(channel, _prefix, metric) except MissingScalarError: abort(404, description="Cannot find this scalar definition.") break else: revision = histogram_revision_map[channel] try: definition = Histogram(metric, {"values": {}}, revision=revision) except KeyError: # Couldn't find the histogram definition abort(404, description="Cannot find this histogram definition.") kind = definition.kind description = definition.definition.description() if kind == "count": labels = COUNT_HISTOGRAM_LABELS dimensions["metric"] = "{}_{}".format(COUNT_HISTOGRAM_PREFIX, metric) elif kind == "flag": labels = [0, 1] else: labels = definition.get_value().keys().tolist() altered_dimensions = deepcopy(dimensions) if 'child' in dimensions: # Bug 1339139 - when adding gpu processes, child process went from True/False to "true"/"false"/"gpu" reverse_map = {True: 'true', False: 'false'} altered_dimensions['child'] = reverse_map.get(altered_dimensions['child'], altered_dimensions['child']) # Fetch metrics if metric.startswith("USE_COUNTER2_"): # Bug 1412382 - Use Counters need to be composed from reported True # values and False values supplied by *CONTENT_DOCUMENTS_DESTROYED. denominator = "TOP_LEVEL_CONTENT_DOCUMENTS_DESTROYED" if metric.endswith("_DOCUMENT"): denominator = "CONTENT_DOCUMENTS_DESTROYED" denominator = "{}_{}".format(COUNT_HISTOGRAM_PREFIX, denominator) denominator_dimensions = deepcopy(dimensions) denominator_dimensions["metric"] = denominator denominator_new_dimensions = deepcopy(altered_dimensions) denominator_new_dimensions["metric"] = denominator result = execute_query( "select * from batched_get_use_counter(%s, %s, %s, %s, %s, %s, %s, %s)", ( prefix, channel, version, dates, json.dumps(denominator_dimensions), json.dumps(denominator_new_dimensions), json.dumps(dimensions), json.dumps(altered_dimensions))) else: result = execute_query( "select * from batched_get_metric(%s, %s, %s, %s, %s, %s)", ( prefix, channel, version, dates, json.dumps(dimensions), json.dumps(altered_dimensions))) if not result: abort(404, description="No data found for this metric.") pretty_result = {"data": [], "buckets": labels, "kind": kind, "description": description} for row in result: date = row[0] label = row[1] histogram = row[2][:-2] sum = row[2][-2] count = row[2][-1] pretty_result["data"].append({"date": date, "label": label, "histogram": histogram, "count": count, "sum": sum}) return Response(json.dumps(pretty_result), mimetype="application/json")
def test_histogram_without_revision(): # Histogram without revision Histogram("STARTUP_CRASH_DETECTED", [1, 0, 0, 0, -1, -1, 0, 0], "http://hg.mozilla.org/mozilla-central/rev/da2f28836843")
def test_categorical_histogram_dict_value(): cat2 = Histogram('TELEMETRY_TEST_CATEGORICAL', {'values': {u'0': 2, u'1': 1, u'2': 0, u'3': 0}}) assert all(cat2.get_value() == series)
def test_histogram_without_revision(): # Histogram without revision Histogram("STARTUP_CRASH_DETECTED", [1, 0, 0, 0, -1, -1, 0, 0], "https://hg.mozilla.org/mozilla-central/rev/838652a84b76")
def test_malformed_categorical(): # See bug 1353196 cat2 = Histogram('TELEMETRY_TEST_CATEGORICAL', {}) assert all(cat2.get_value() == 0)
def test_malformed_non_categorical(): hist = Histogram('GC_REASON_2', {}) assert all(hist.get_value() == 0)
def test_large_categorical_values(): hist = Histogram('TELEMETRY_TEST_CATEGORICAL', {'values': {u'3.0683611344442837e+257': 1}}) assert all(hist.get_value() == 0)
def test_large_values(): hist = Histogram('GC_REASON_2', {'values': {u'3.0683611344442837e+257': 1}}) assert all(hist.get_value() == 0)