def UpdateMetrics(db_client, day_stats, callback, dry_run=True, prefix_to_erase=None, hms_tuple=None): """Write 'day_stats' to the metrics table. First lookup any existing metrics and update them. 'day_stats' is a dictionary of {day_in_iso8601: DotDict}. If 'dry_run' is True, don't commit the changes to the metrics table, but perform all the work and log to info. If 'prefix_to_erase' is not None, we first replace the passed-in prefix with an empty dotdict. If 'hms_tuple' is not None, the timestamp for the metric entry will be with the specified hour/minute/second, otherwise, we use noon. To help with consistency, hms_tuple should come from kDailyMetricsTimeByLogType above. For example, given the existing metric: { itunes: { downloads: { 'US': 5, 'UK': 3 }, update: { ... }}} We can either: - Replace the downloads numbers: (the entire tree under 'prefix_to_erase' gets replaced) UpdateMetrics({'2013-02-01': {'itunes': {'downloads': { 'DE': 3, 'FR': 1 }}}}, prefix_to_erase='itunes.downloads') resulting in: { itunes: { downloads: { 'DE': 3, 'FR': 1 }, update: { ... }}} - Or we can update with partial stats: UpdateMetrics({'2013-02-01': {'itunes': { 'downloads': { 'DE': 3, 'FR': 1 }}}}, replace=False) resulting in: { itunes: { downloads: { 'US': 5, 'UK': 3, 'DE': 3, 'FR': 3 }, update: { ... }}} """ if len(day_stats) == 0: callback() return cluster = metric.LOGS_STATS_NAME group_key = metric.Metric.EncodeGroupKey(cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily')) # Convert YYYY-MM-DD into the timestamp for noon UTC. h, m, s = hms_tuple if hms_tuple is not None else (12, 0, 0) timestamps = [(util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s), day) for day in sorted(day_stats.keys())] # Query Metrics table for all metrics between the timestamps we have data for. existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, db_client, group_key, timestamps[0][0], timestamps[-1][0]) existing_dict = dict((m.timestamp, m) for m in existing_metrics) tasks = [] for t, day in timestamps: data = day_stats[day] prev_metric = existing_dict.get(t, None) payload = json.dumps(data) if prev_metric is None: logging.info('%s: new metric: %r' % (day, payload)) else: prev_payload = prev_metric.payload # We do this twice, it's simpler than making deepcopy work on DotDict. prev_data = DotDict(json.loads(prev_payload)) new_data = DotDict(json.loads(prev_payload)) if prefix_to_erase is not None: # We can't call 'del' on a DotDict's internals, so simply replace with an empty dotdict, we'll be repopulating. new_data[prefix_to_erase] = DotDict() # DotDict doesn't have an update() method. flat = new_data.flatten() flat.update(data.flatten()) new_data = DotDict(flat) payload = json.dumps(new_data) if new_data.flatten() == prev_data.flatten(): logging.info('%s: metric has not changed, skipping' % day) continue else: logging.info('%s: changed metric: %s -> %s' % (day, prev_payload, payload)) if not dry_run: new_metric = metric.Metric.Create(group_key, 'logs_daily', t, payload) tasks.append(gen.Task(new_metric.Update, db_client)) yield tasks callback()