Example #1
0
def UpdateMetrics(db_client, day_stats, callback, dry_run=True, prefix_to_erase=None, hms_tuple=None):
  """Write 'day_stats' to the metrics table. First lookup any existing metrics and update them.
  'day_stats' is a dictionary of {day_in_iso8601: DotDict}.
  If 'dry_run' is True, don't commit the changes to the metrics table, but perform all the work and log to info.
  If 'prefix_to_erase' is not None, we first replace the passed-in prefix with an empty dotdict.
  If 'hms_tuple' is not None, the timestamp for the metric entry will be with the specified hour/minute/second,
  otherwise, we use noon. To help with consistency, hms_tuple should come from kDailyMetricsTimeByLogType above.

  For example, given the existing metric: { itunes: { downloads: { 'US': 5, 'UK': 3 }, update: { ... }}}
  We can either:
    - Replace the downloads numbers: (the entire tree under 'prefix_to_erase' gets replaced)
      UpdateMetrics({'2013-02-01': {'itunes': {'downloads': { 'DE': 3, 'FR': 1 }}}}, prefix_to_erase='itunes.downloads')
      resulting in: { itunes: { downloads: { 'DE': 3, 'FR': 1 }, update: { ... }}}
    - Or we can update with partial stats:
      UpdateMetrics({'2013-02-01': {'itunes': { 'downloads': { 'DE': 3, 'FR': 1 }}}}, replace=False)
      resulting in: { itunes: { downloads: { 'US': 5, 'UK': 3, 'DE': 3, 'FR': 3 }, update: { ... }}}
  """
  if len(day_stats) == 0:
    callback()
    return

  cluster = metric.LOGS_STATS_NAME
  group_key = metric.Metric.EncodeGroupKey(cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily'))

  # Convert YYYY-MM-DD into the timestamp for noon UTC.
  h, m, s = hms_tuple if hms_tuple is not None else (12, 0, 0)
  timestamps = [(util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s), day) for day in sorted(day_stats.keys())]

  # Query Metrics table for all metrics between the timestamps we have data for.
  existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, db_client, group_key,
                                    timestamps[0][0], timestamps[-1][0])
  existing_dict = dict((m.timestamp, m) for m in existing_metrics)

  tasks = []
  for t, day in timestamps:
    data = day_stats[day]
    prev_metric = existing_dict.get(t, None)

    payload = json.dumps(data)
    if prev_metric is None:
      logging.info('%s: new metric: %r' % (day, payload))
    else:
      prev_payload = prev_metric.payload
      # We do this twice, it's simpler than making deepcopy work on DotDict.
      prev_data = DotDict(json.loads(prev_payload))
      new_data = DotDict(json.loads(prev_payload))
      if prefix_to_erase is not None:
        # We can't call 'del' on a DotDict's internals, so simply replace with an empty dotdict, we'll be repopulating.
        new_data[prefix_to_erase] = DotDict()

      # DotDict doesn't have an update() method.
      flat = new_data.flatten()
      flat.update(data.flatten())
      new_data = DotDict(flat)

      payload = json.dumps(new_data)
      if new_data.flatten() == prev_data.flatten():
        logging.info('%s: metric has not changed, skipping' % day)
        continue
      else:
        logging.info('%s: changed metric: %s -> %s' % (day, prev_payload, payload))

    if not dry_run:
      new_metric = metric.Metric.Create(group_key, 'logs_daily', t, payload)
      tasks.append(gen.Task(new_metric.Update, db_client))

  yield tasks
  callback()