def SerializeMetrics(metrics): def _SkipMetric(name): for regex in kFilteredMetrics: res = re.match(regex, k) if res is not None: return False return True def _AggregateMetric(running_sum, metric_name): """Given a metric name, determine whether we sum it into a different metric name or not. Returns whether the original metric needs to be processed. """ keep = True for regex, replacement, in kSummedMetrics: res = regex.sub(replacement, metric_name) if res != metric_name: keep = False if not _SkipMetric(res): running_sum[res] += v return keep data = defaultdict(list) prev_metrics = {} seen_vars = set() for m in metrics: running_sum = Counter() timestamp = m.timestamp payload = DotDict(json.loads(m.payload)).flatten() for k, v in payload.iteritems(): keep_original = _AggregateMetric(running_sum, k) if keep_original and not _SkipMetric(k): running_sum[k] += v for k, v in running_sum.iteritems(): data[k].append((timestamp, v)) return data
def _SerializeMetrics(metrics): def _SkipMetric(name): for regex, allowed_groups in kFilteredMetrics: res = re.match(regex, k) if res is None: continue assert len(res.groups()) == 1 if res.groups()[0] in allowed_groups: return False else: return True return False def _AggregateMetric(running_sum, metric_name): """Given a metric name, determine whether we sum it into a different metric name or not. Returns whether the original metric needs to be processed. """ keep = True for regex, replacement, in kSummedMetrics: res = regex.sub(replacement, metric_name) if res != metric_name: keep = False if not _SkipMetric(res): running_sum[res] += v return keep data = defaultdict(dict) prev_metrics = {} seen_vars = set() for m in metrics: running_sum = Counter() timestamp = m.timestamp payload = DotDict(json.loads(m.payload)).flatten() for k, v in payload.iteritems(): keep_original = _AggregateMetric(running_sum, k) if keep_original and not _SkipMetric(k): running_sum[k] += v for k, v in running_sum.iteritems(): d = data[k] if len(d) == 0: d['is_average'] = False d['cluster_total'] = list() d['cluster_rate'] = list() d['description'] = k d['cluster_total'].append((timestamp, v)) if k in prev_metrics: _, prev_v = prev_metrics[k] # Since the metrics are written exactly once a day, no need to divide, just use the difference. diff = (v - prev_v) else: diff = v if k not in seen_vars: # Skip the first data point, we don't know what the previous value is. # We can't use prev_metrics since metrics with holes (eg: missing days) get removed. d['cluster_rate'].append((timestamp, None)) seen_vars.add(k) else: d['cluster_rate'].append((timestamp, diff)) prev_metrics[k] = (timestamp, v) # Look for metrics that haven't been set recently and insert None to break the graph. # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next # time. Instead, we break the metric if we haven't seen a data point in slightly over one day. for k, (t, v) in prev_metrics.items(): if (timestamp - t) > (constants.SECONDS_PER_DAY + constants.SECONDS_PER_HOUR): # data[k] can't be empty since we've seen this key before. data[k]['cluster_total'].append((timestamp, None)) data[k]['cluster_rate'].append((timestamp, -v)) # Remove it so we don't send back lots of data for no reason. del prev_metrics[k] return data