def SerializeMetrics(metrics):
  def _SkipMetric(name):
    for regex in kFilteredMetrics:
      res = re.match(regex, k)
      if res is not None:
        return False
    return True

  def _AggregateMetric(running_sum, metric_name):
    """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
    keep = True
    for regex, replacement, in kSummedMetrics:
      res = regex.sub(replacement, metric_name)
      if res != metric_name:
        keep = False
        if not _SkipMetric(res):
          running_sum[res] += v
    return keep

  data = defaultdict(list)
  prev_metrics = {}
  seen_vars = set()
  for m in metrics:
    running_sum = Counter()
    timestamp = m.timestamp
    payload = DotDict(json.loads(m.payload)).flatten()
    for k, v in payload.iteritems():
      keep_original = _AggregateMetric(running_sum, k)
      if keep_original and not _SkipMetric(k):
        running_sum[k] += v
    for k, v in running_sum.iteritems():
      data[k].append((timestamp, v))

  return data
Example #2
0
def SerializeMetrics(metrics):
    def _SkipMetric(name):
        for regex in kFilteredMetrics:
            res = re.match(regex, k)
            if res is not None:
                return False
        return True

    def _AggregateMetric(running_sum, metric_name):
        """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
        keep = True
        for regex, replacement, in kSummedMetrics:
            res = regex.sub(replacement, metric_name)
            if res != metric_name:
                keep = False
                if not _SkipMetric(res):
                    running_sum[res] += v
        return keep

    data = defaultdict(list)
    prev_metrics = {}
    seen_vars = set()
    for m in metrics:
        running_sum = Counter()
        timestamp = m.timestamp
        payload = DotDict(json.loads(m.payload)).flatten()
        for k, v in payload.iteritems():
            keep_original = _AggregateMetric(running_sum, k)
            if keep_original and not _SkipMetric(k):
                running_sum[k] += v
        for k, v in running_sum.iteritems():
            data[k].append((timestamp, v))

    return data
Example #3
0
def _SerializeMetrics(metrics):
  def _SkipMetric(name):
    for regex, allowed_groups in kFilteredMetrics:
      res = re.match(regex, k)
      if res is None:
        continue
      assert len(res.groups()) == 1
      if res.groups()[0] in allowed_groups:
        return False
      else:
        return True
    return False

  def _AggregateMetric(running_sum, metric_name):
    """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
    keep = True
    for regex, replacement, in kSummedMetrics:
      res = regex.sub(replacement, metric_name)
      if res != metric_name:
        keep = False
        if not _SkipMetric(res):
          running_sum[res] += v
    return keep

  data = defaultdict(dict)
  prev_metrics = {}
  seen_vars = set()
  for m in metrics:
    running_sum = Counter()
    timestamp = m.timestamp
    payload = DotDict(json.loads(m.payload)).flatten()
    for k, v in payload.iteritems():
      keep_original = _AggregateMetric(running_sum, k)
      if keep_original and not _SkipMetric(k):
        running_sum[k] += v
    for k, v in running_sum.iteritems():
      d = data[k]
      if len(d) == 0:
        d['is_average'] = False
        d['cluster_total'] = list()
        d['cluster_rate'] = list()
        d['description'] = k
      d['cluster_total'].append((timestamp, v))
      if k in prev_metrics:
        _, prev_v = prev_metrics[k]
        # Since the metrics are written exactly once a day, no need to divide, just use the difference.
        diff = (v - prev_v)
      else:
        diff = v
      if k not in seen_vars:
        # Skip the first data point, we don't know what the previous value is.
        # We can't use prev_metrics since metrics with holes (eg: missing days) get removed.
        d['cluster_rate'].append((timestamp, None))
        seen_vars.add(k)
      else:
        d['cluster_rate'].append((timestamp, diff))
      prev_metrics[k] = (timestamp, v)
    # Look for metrics that haven't been set recently and insert None to break the graph.
    # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next
    # time. Instead, we break the metric if we haven't seen a data point in slightly over one day.
    for k, (t, v) in prev_metrics.items():
      if (timestamp - t) > (constants.SECONDS_PER_DAY + constants.SECONDS_PER_HOUR):
        # data[k] can't be empty since we've seen this key before.
        data[k]['cluster_total'].append((timestamp, None))
        data[k]['cluster_rate'].append((timestamp, -v))
        # Remove it so we don't send back lots of data for no reason.
        del prev_metrics[k]

  return data
Example #4
0
def _SerializeMetrics(metrics):
    def _SkipMetric(name):
        for regex, allowed_groups in kFilteredMetrics:
            res = re.match(regex, k)
            if res is None:
                continue
            assert len(res.groups()) == 1
            if res.groups()[0] in allowed_groups:
                return False
            else:
                return True
        return False

    def _AggregateMetric(running_sum, metric_name):
        """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
        keep = True
        for regex, replacement, in kSummedMetrics:
            res = regex.sub(replacement, metric_name)
            if res != metric_name:
                keep = False
                if not _SkipMetric(res):
                    running_sum[res] += v
        return keep

    data = defaultdict(dict)
    prev_metrics = {}
    seen_vars = set()
    for m in metrics:
        running_sum = Counter()
        timestamp = m.timestamp
        payload = DotDict(json.loads(m.payload)).flatten()
        for k, v in payload.iteritems():
            keep_original = _AggregateMetric(running_sum, k)
            if keep_original and not _SkipMetric(k):
                running_sum[k] += v
        for k, v in running_sum.iteritems():
            d = data[k]
            if len(d) == 0:
                d['is_average'] = False
                d['cluster_total'] = list()
                d['cluster_rate'] = list()
                d['description'] = k
            d['cluster_total'].append((timestamp, v))
            if k in prev_metrics:
                _, prev_v = prev_metrics[k]
                # Since the metrics are written exactly once a day, no need to divide, just use the difference.
                diff = (v - prev_v)
            else:
                diff = v
            if k not in seen_vars:
                # Skip the first data point, we don't know what the previous value is.
                # We can't use prev_metrics since metrics with holes (eg: missing days) get removed.
                d['cluster_rate'].append((timestamp, None))
                seen_vars.add(k)
            else:
                d['cluster_rate'].append((timestamp, diff))
            prev_metrics[k] = (timestamp, v)
        # Look for metrics that haven't been set recently and insert None to break the graph.
        # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next
        # time. Instead, we break the metric if we haven't seen a data point in slightly over one day.
        for k, (t, v) in prev_metrics.items():
            if (timestamp - t) > (constants.SECONDS_PER_DAY +
                                  constants.SECONDS_PER_HOUR):
                # data[k] can't be empty since we've seen this key before.
                data[k]['cluster_total'].append((timestamp, None))
                data[k]['cluster_rate'].append((timestamp, -v))
                # Remove it so we don't send back lots of data for no reason.
                del prev_metrics[k]

    return data