Example #1
0
  def _AddMetric(self, metric):
    """Adds a single metric sample to the aggregation.  Metric samples must be added in
    chronological order.
    """
    machine = metric.machine_id
    time = metric.timestamp
    payload = DotDict(json.loads(metric.payload)).flatten()

    self.machines.add(machine)
    self.timestamps.add(time)
    for k in payload:
      if k not in self.counter_data:
        continue
      val = payload.get(k, None)
      if val is not None:
        self.counter_data[k].AddSample(machine, time, val)
def SerializeMetrics(metrics):
  def _SkipMetric(name):
    for regex in kFilteredMetrics:
      res = re.match(regex, k)
      if res is not None:
        return False
    return True

  def _AggregateMetric(running_sum, metric_name):
    """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
    keep = True
    for regex, replacement, in kSummedMetrics:
      res = regex.sub(replacement, metric_name)
      if res != metric_name:
        keep = False
        if not _SkipMetric(res):
          running_sum[res] += v
    return keep

  data = defaultdict(list)
  prev_metrics = {}
  seen_vars = set()
  for m in metrics:
    running_sum = Counter()
    timestamp = m.timestamp
    payload = DotDict(json.loads(m.payload)).flatten()
    for k, v in payload.iteritems():
      keep_original = _AggregateMetric(running_sum, k)
      if keep_original and not _SkipMetric(k):
        running_sum[k] += v
    for k, v in running_sum.iteritems():
      data[k].append((timestamp, v))

  return data
Example #3
0
def _SerializeMetrics(metrics):
  def _SkipMetric(name):
    for regex, allowed_groups in kFilteredMetrics:
      res = re.match(regex, k)
      if res is None:
        continue
      assert len(res.groups()) == 1
      if res.groups()[0] in allowed_groups:
        return False
      else:
        return True
    return False

  def _AggregateMetric(running_sum, metric_name):
    """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
    keep = True
    for regex, replacement, in kSummedMetrics:
      res = regex.sub(replacement, metric_name)
      if res != metric_name:
        keep = False
        if not _SkipMetric(res):
          running_sum[res] += v
    return keep

  data = defaultdict(dict)
  prev_metrics = {}
  seen_vars = set()
  for m in metrics:
    running_sum = Counter()
    timestamp = m.timestamp
    payload = DotDict(json.loads(m.payload)).flatten()
    for k, v in payload.iteritems():
      keep_original = _AggregateMetric(running_sum, k)
      if keep_original and not _SkipMetric(k):
        running_sum[k] += v
    for k, v in running_sum.iteritems():
      d = data[k]
      if len(d) == 0:
        d['is_average'] = False
        d['cluster_total'] = list()
        d['cluster_rate'] = list()
        d['description'] = k
      d['cluster_total'].append((timestamp, v))
      if k in prev_metrics:
        _, prev_v = prev_metrics[k]
        # Since the metrics are written exactly once a day, no need to divide, just use the difference.
        diff = (v - prev_v)
      else:
        diff = v
      if k not in seen_vars:
        # Skip the first data point, we don't know what the previous value is.
        # We can't use prev_metrics since metrics with holes (eg: missing days) get removed.
        d['cluster_rate'].append((timestamp, None))
        seen_vars.add(k)
      else:
        d['cluster_rate'].append((timestamp, diff))
      prev_metrics[k] = (timestamp, v)
    # Look for metrics that haven't been set recently and insert None to break the graph.
    # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next
    # time. Instead, we break the metric if we haven't seen a data point in slightly over one day.
    for k, (t, v) in prev_metrics.items():
      if (timestamp - t) > (constants.SECONDS_PER_DAY + constants.SECONDS_PER_HOUR):
        # data[k] can't be empty since we've seen this key before.
        data[k]['cluster_total'].append((timestamp, None))
        data[k]['cluster_rate'].append((timestamp, -v))
        # Remove it so we don't send back lots of data for no reason.
        del prev_metrics[k]

  return data
Example #4
0
def UpdateMetrics(db_client, day_stats, callback, dry_run=True, prefix_to_erase=None, hms_tuple=None):
  """Write 'day_stats' to the metrics table. First lookup any existing metrics and update them.
  'day_stats' is a dictionary of {day_in_iso8601: DotDict}.
  If 'dry_run' is True, don't commit the changes to the metrics table, but perform all the work and log to info.
  If 'prefix_to_erase' is not None, we first replace the passed-in prefix with an empty dotdict.
  If 'hms_tuple' is not None, the timestamp for the metric entry will be with the specified hour/minute/second,
  otherwise, we use noon. To help with consistency, hms_tuple should come from kDailyMetricsTimeByLogType above.

  For example, given the existing metric: { itunes: { downloads: { 'US': 5, 'UK': 3 }, update: { ... }}}
  We can either:
    - Replace the downloads numbers: (the entire tree under 'prefix_to_erase' gets replaced)
      UpdateMetrics({'2013-02-01': {'itunes': {'downloads': { 'DE': 3, 'FR': 1 }}}}, prefix_to_erase='itunes.downloads')
      resulting in: { itunes: { downloads: { 'DE': 3, 'FR': 1 }, update: { ... }}}
    - Or we can update with partial stats:
      UpdateMetrics({'2013-02-01': {'itunes': { 'downloads': { 'DE': 3, 'FR': 1 }}}}, replace=False)
      resulting in: { itunes: { downloads: { 'US': 5, 'UK': 3, 'DE': 3, 'FR': 3 }, update: { ... }}}
  """
  if len(day_stats) == 0:
    callback()
    return

  cluster = metric.LOGS_STATS_NAME
  group_key = metric.Metric.EncodeGroupKey(cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily'))

  # Convert YYYY-MM-DD into the timestamp for noon UTC.
  h, m, s = hms_tuple if hms_tuple is not None else (12, 0, 0)
  timestamps = [(util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s), day) for day in sorted(day_stats.keys())]

  # Query Metrics table for all metrics between the timestamps we have data for.
  existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, db_client, group_key,
                                    timestamps[0][0], timestamps[-1][0])
  existing_dict = dict((m.timestamp, m) for m in existing_metrics)

  tasks = []
  for t, day in timestamps:
    data = day_stats[day]
    prev_metric = existing_dict.get(t, None)

    payload = json.dumps(data)
    if prev_metric is None:
      logging.info('%s: new metric: %r' % (day, payload))
    else:
      prev_payload = prev_metric.payload
      # We do this twice, it's simpler than making deepcopy work on DotDict.
      prev_data = DotDict(json.loads(prev_payload))
      new_data = DotDict(json.loads(prev_payload))
      if prefix_to_erase is not None:
        # We can't call 'del' on a DotDict's internals, so simply replace with an empty dotdict, we'll be repopulating.
        new_data[prefix_to_erase] = DotDict()

      # DotDict doesn't have an update() method.
      flat = new_data.flatten()
      flat.update(data.flatten())
      new_data = DotDict(flat)

      payload = json.dumps(new_data)
      if new_data.flatten() == prev_data.flatten():
        logging.info('%s: metric has not changed, skipping' % day)
        continue
      else:
        logging.info('%s: changed metric: %s -> %s' % (day, prev_payload, payload))

    if not dry_run:
      new_metric = metric.Metric.Create(group_key, 'logs_daily', t, payload)
      tasks.append(gen.Task(new_metric.Update, db_client))

  yield tasks
  callback()