def _AddMetric(self, metric): """Adds a single metric sample to the aggregation. Metric samples must be added in chronological order. """ machine = metric.machine_id time = metric.timestamp payload = DotDict(json.loads(metric.payload)).flatten() self.machines.add(machine) self.timestamps.add(time) for k in payload: if k not in self.counter_data: continue val = payload.get(k, None) if val is not None: self.counter_data[k].AddSample(machine, time, val)
def ParseContents(contents): result = DotDict() skipped_lines = [] for line in contents.splitlines(): tokens = line.split('\t') if tokens[0] == 'Provider': # Skip header line. skipped_lines.append(line) continue # Replace dots with underscores as we'll be using the version in a DotDict. version = tokens[5].replace('.', '_') if not version or version == ' ': # subscriptions do not have a version, use 'all'. version = 'all' type_id = tokens[6] # Use the type id if we don't have a name for it. type_name = itunes_trends_codes.PRODUCT_TYPE_IDENTIFIER.get( type_id, type_id) units = int(tokens[7]) # Ignore proceeds, it does not reflect in-app purchases. store = tokens[12] result['itunes.%s.%s.%s' % (type_name, version, store)] = units assert len(skipped_lines ) <= 1, 'Skipped too many lines: %r' % skipped_lines return result
def RegisterRun(self, status, callback, stats=None, failure_msg=None): """Write the metric entry for this run. The start_time is set in Start(). end_time is now. If stats is not none, the DotDict is added to the metrics payload with the prefix 'stats'. If failure_msg is not None and status==STATUS_FAILURE, write the message in payload.failure_msg. """ assert status in [None, Job.STATUS_SUCCESS, Job.STATUS_FAILURE], 'Unknown status: %s' % status assert self._start_time is not None, 'Writing job summary, but Start never called.' end_time = int(time.time()) payload = DotDict() payload['start_time'] = self._start_time payload['end_time'] = end_time payload['status'] = status if stats is not None: assert isinstance(stats, DotDict), 'Stats is not a DotDict: %r' % stats payload['stats'] = stats if failure_msg is not None and status == Job.STATUS_FAILURE: payload['failure_msg'] = failure_msg cluster = metric.JOBS_STATS_NAME group_key = metric.Metric.EncodeGroupKey( cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily')) new_metric = metric.Metric.Create(group_key, self._name, self._start_time, json.dumps(payload)) yield gen.Task(new_metric.Update, self._client) # Clear start time, we should not be able to run RegisterRun multiple times for a single run. self._start_time = None callback()
def _Start(callback): """Grab a lock on job:analyze_analytics and call RunOnce. If we get a return value, write it to the job summary.""" client = db_client.DBClient.Instance() job = Job(client, 'analyze_analytics') if options.options.require_lock: got_lock = yield gen.Task(job.AcquireLock) if got_lock == False: logging.warning('Failed to acquire job lock: exiting.') callback() return result = None job.Start() try: result = yield gen.Task(RunOnce, client, job) except: # Failure: log run summary with trace. typ, val, tb = sys.exc_info() msg = ''.join(traceback.format_exception(typ, val, tb)) logging.info('Registering failed run with message: %s' % msg) yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg) else: if result is not None and not options.options.dry_run: # Successful run with data processed and not in dry-run mode: write run summary. stats = DotDict() stats['last_day'] = result logging.info('Registering successful run with stats: %r' % stats) yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats) finally: yield gen.Task(job.ReleaseLock) callback()
def _GetMetric(self, day, h=12, m=0, s=0): timestamp = util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s) existing_metrics = self._RunAsync(metric.Metric.QueryTimespan, self._client, self._group_key, timestamp, timestamp) if len(existing_metrics) == 0: return None return DotDict(json.loads(existing_metrics[0].payload))
def ToDotDict(self): """Returns the full data contained in this object in the form of a dotdict.""" dt = DotDict() dt['user_requests.all'] = self._active_all dt['user_requests.post'] = self._active_post dt['user_requests.share'] = self._active_share dt['user_requests.view'] = self._active_view return dt
def _OnQueryMetric(min_metrics, max_metrics, metrics): self.assertTrue(len(metrics) >= min_metrics and len(metrics) <= max_metrics, '%d not in [%d-%d]' % (len(metrics), min_metrics, max_metrics)) for m in metrics: self.assertTrue(m.timestamp % 3 == 0) payload = DotDict(json.loads(metrics[0].payload)) keys = counters.counters.flatten().keys() for k in keys: self.assertTrue(k in payload, 'Payload did not contain record for counter %s' % k) self.stop()
def _SerializeMetrics(metrics, metric_name): def _DisplayParams(): for regexp, sort, show in kSortByCount: if re.match(regexp, metric_name): return (sort, show) return (False, False) columns = Counter() data = [] for m in metrics: timestamp = m.timestamp d = defaultdict(int) d['day'] = util.TimestampUTCToISO8601(timestamp).replace('-', '/') dd = DotDict(json.loads(m.payload)) if metric_name not in dd: continue payload = dd[metric_name].flatten() for k, v in payload.iteritems(): if metric_name in kMetricSignificantLevel: k = k.split('.')[kMetricSignificantLevel[metric_name]] columns[k] += v d[k] += v d['Total'] += v columns['Total'] += v data.append(d) # We now have "columns" with totals for each column. We need to sort everything. sort_by_count, show_total = _DisplayParams() if sort_by_count: sorted_cols = columns.most_common() else: sorted_cols = sorted([(k, v) for k, v in columns.iteritems()]) cols = ['Day'] cols.append('Total %d' % columns['Total'] if show_total else 'Total') for k, v in sorted_cols: if k == 'Total': continue cols.append('%s %d' % (k, v) if show_total else k) sorted_data = [] for d in reversed(data): s = [d['day'], d['Total']] for k, _ in sorted_cols: if k == 'Total': continue s.append(d[k] if d[k] > 0 else '') sorted_data.append(s) return (cols, sorted_data)
def SerializeMetrics(metrics): def _SkipMetric(name): for regex in kFilteredMetrics: res = re.match(regex, k) if res is not None: return False return True def _AggregateMetric(running_sum, metric_name): """Given a metric name, determine whether we sum it into a different metric name or not. Returns whether the original metric needs to be processed. """ keep = True for regex, replacement, in kSummedMetrics: res = regex.sub(replacement, metric_name) if res != metric_name: keep = False if not _SkipMetric(res): running_sum[res] += v return keep data = defaultdict(list) prev_metrics = {} seen_vars = set() for m in metrics: running_sum = Counter() timestamp = m.timestamp payload = DotDict(json.loads(m.payload)).flatten() for k, v in payload.iteritems(): keep_original = _AggregateMetric(running_sum, k) if keep_original and not _SkipMetric(k): running_sum[k] += v for k, v in running_sum.iteritems(): data[k].append((timestamp, v)) return data
def FindPreviousRuns(self, callback, start_timestamp=None, status=None, limit=None): """Look for previous runs of this job in the metrics table. Return all found runs regardless of status. If start_timestamp is None, search for jobs started in the last week. If status is specified, only return runs that finished with this status, otherwise return all runs. If limit is not None, return only the latest 'limit' runs, otherwise return all runs. Runs are sorted by timestamp. """ assert status in [None, Job.STATUS_SUCCESS, Job.STATUS_FAILURE], 'Unknown status: %s' % status runs = [] cluster = metric.JOBS_STATS_NAME # TODO(marc): there is no guarantee that jobs will run daily (could be more or less). It shouldn't matter except # when accessing the data using counters. group_key = metric.Metric.EncodeGroupKey( cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily')) start_time = start_timestamp if start_timestamp is not None else time.time( ) - constants.SECONDS_PER_WEEK # Search for metrics from start_time to now. existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, self._client, group_key, start_time, None) for m in existing_metrics: if m.machine_id != self._name: # Not for this job. continue # Parse and validate payload. payload = DotDict(json.loads(m.payload)) assert 'start_time' in payload and 'status' in payload, 'Malformed payload: %r' % payload assert payload[ 'start_time'] == m.timestamp, 'Payload start_time does not match metric timestamp' if status is not None and payload['status'] != status: continue runs.append(payload) # Sort by timestamp, although it should already should be. runs.sort(key=lambda payload: payload['start_time']) if limit is None: callback(runs) else: callback(runs[-limit:])
def _Start(callback): """Grab a lock on job:analyze_dynamodb and call RunOnce. If we get a return value, write it to the job summary.""" # Setup throttling. for table in vf_schema.SCHEMA.GetTables(): table.read_units = max( 1, table.read_units // options.options.throttling_factor) table.write_units = max( 1, table.write_units // options.options.throttling_factor) client = db_client.DBClient.Instance() job = Job(client, 'analyze_dynamodb') if not options.options.dry_run and options.options.limit_users > 0: logging.error( '--limit_users specified, but not running in dry-run mode. Aborting' ) callback() return if options.options.require_lock: got_lock = yield gen.Task(job.AcquireLock) if got_lock == False: logging.warning('Failed to acquire job lock: exiting.') callback() return result = None job.Start() try: result = yield gen.Task(RunOnce, client, job) except: # Failure: log run summary with trace. typ, val, tb = sys.exc_info() msg = ''.join(traceback.format_exception(typ, val, tb)) logging.info('Registering failed run with message: %s' % msg) yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg) else: if result is not None and not options.options.dry_run: # Successful run with data processed and not in dry-run mode: write run summary. stats = DotDict() stats['last_day'] = result logging.info('Registering successful run with stats: %r' % stats) yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats) finally: yield gen.Task(job.ReleaseLock) callback()
def RunOnce(client, callback): today = util.NowUTCToISO8601() logging.info('getting table sizes for %s' % today) results = yield gen.Task(vf_schema.SCHEMA.VerifyOrCreate, client, verify_only=True) stats = DotDict() for r in sorted(results): name = r[0] props = r[1] stats['db.table.count.%s' % name] = props.count stats['db.table.size.%s' % name] = props.size_bytes # Replace the entire 'db.table' prefix in previous metrics. hms = logs_util.kDailyMetricsTimeByLogType['dynamodb_stats'] yield gen.Task(logs_util.UpdateMetrics, client, {today: stats}, prefix_to_erase='db.table', dry_run=options.options.dry_run, hms_tuple=hms) callback()
def _Start(callback): """Grab a lock on job:server_log_metrics and call RunOnce. If we get a return value, write it to the job summary.""" if options.options.send_email: # When running on devbox, this prompts for the passphrase. Skip if not sending email. EmailManager.SetInstance(SendGridEmailManager()) else: EmailManager.SetInstance(LoggingEmailManager()) client = db_client.DBClient.Instance() job = Job(client, 'server_log_metrics') if options.options.require_lock: got_lock = yield gen.Task(job.AcquireLock) if got_lock == False: logging.warning('Failed to acquire job lock: exiting.') callback() return is_full_run = all([ options.options.compute_user_requests, options.options.compute_registration_delay, options.options.compute_app_versions ]) result = None job.Start() try: result = yield gen.Task(RunOnce, client, job) except: # Failure: log run summary with trace. typ, val, tb = sys.exc_info() msg = ''.join(traceback.format_exception(typ, val, tb)) logging.info('Registering failed run with message: %s' % msg) yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg) else: if result is not None and not options.options.dry_run and is_full_run: # Successful full run with data processed and not in dry-run mode: write run summary. stats = DotDict() stats['last_day'] = result logging.info('Registering successful run with stats: %r' % stats) yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats) finally: yield gen.Task(job.ReleaseLock) callback()
def _Start(callback): """Grab a lock on job:itunes_trends and call RunOnce. If we get a return value, write it to the job summary.""" assert options.options.user is not None and options.options.vendor_id is not None apple_id = '*****@*****.**' % options.options.user # Attempt to lookup iTunes Connect password from secrets. password = secrets.GetSecret('itunes_connect_%s' % options.options.user) assert password client = db_client.DBClient.Instance() job = Job(client, 'itunes_trends') if options.options.require_lock: got_lock = yield gen.Task(job.AcquireLock) if got_lock == False: logging.warning('Failed to acquire job lock: exiting.') callback() return result = None job.Start() try: result = yield gen.Task(RunOnce, client, job, apple_id, password) except: # Failure: log run summary with trace. msg = traceback.format_exc() logging.info('Registering failed run with message: %s' % msg) yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg) else: if result is not None and not options.options.dry_run: # Successful run with data processed and not in dry-run mode: write run summary. stats = DotDict() stats['last_day'] = result logging.info('Registering successful run with stats: %r' % stats) yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats) finally: yield gen.Task(job.ReleaseLock) callback()
def UpdateMetrics(db_client, day_stats, callback, dry_run=True, prefix_to_erase=None, hms_tuple=None): """Write 'day_stats' to the metrics table. First lookup any existing metrics and update them. 'day_stats' is a dictionary of {day_in_iso8601: DotDict}. If 'dry_run' is True, don't commit the changes to the metrics table, but perform all the work and log to info. If 'prefix_to_erase' is not None, we first replace the passed-in prefix with an empty dotdict. If 'hms_tuple' is not None, the timestamp for the metric entry will be with the specified hour/minute/second, otherwise, we use noon. To help with consistency, hms_tuple should come from kDailyMetricsTimeByLogType above. For example, given the existing metric: { itunes: { downloads: { 'US': 5, 'UK': 3 }, update: { ... }}} We can either: - Replace the downloads numbers: (the entire tree under 'prefix_to_erase' gets replaced) UpdateMetrics({'2013-02-01': {'itunes': {'downloads': { 'DE': 3, 'FR': 1 }}}}, prefix_to_erase='itunes.downloads') resulting in: { itunes: { downloads: { 'DE': 3, 'FR': 1 }, update: { ... }}} - Or we can update with partial stats: UpdateMetrics({'2013-02-01': {'itunes': { 'downloads': { 'DE': 3, 'FR': 1 }}}}, replace=False) resulting in: { itunes: { downloads: { 'US': 5, 'UK': 3, 'DE': 3, 'FR': 3 }, update: { ... }}} """ if len(day_stats) == 0: callback() return cluster = metric.LOGS_STATS_NAME group_key = metric.Metric.EncodeGroupKey(cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily')) # Convert YYYY-MM-DD into the timestamp for noon UTC. h, m, s = hms_tuple if hms_tuple is not None else (12, 0, 0) timestamps = [(util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s), day) for day in sorted(day_stats.keys())] # Query Metrics table for all metrics between the timestamps we have data for. existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, db_client, group_key, timestamps[0][0], timestamps[-1][0]) existing_dict = dict((m.timestamp, m) for m in existing_metrics) tasks = [] for t, day in timestamps: data = day_stats[day] prev_metric = existing_dict.get(t, None) payload = json.dumps(data) if prev_metric is None: logging.info('%s: new metric: %r' % (day, payload)) else: prev_payload = prev_metric.payload # We do this twice, it's simpler than making deepcopy work on DotDict. prev_data = DotDict(json.loads(prev_payload)) new_data = DotDict(json.loads(prev_payload)) if prefix_to_erase is not None: # We can't call 'del' on a DotDict's internals, so simply replace with an empty dotdict, we'll be repopulating. new_data[prefix_to_erase] = DotDict() # DotDict doesn't have an update() method. flat = new_data.flatten() flat.update(data.flatten()) new_data = DotDict(flat) payload = json.dumps(new_data) if new_data.flatten() == prev_data.flatten(): logging.info('%s: metric has not changed, skipping' % day) continue else: logging.info('%s: changed metric: %s -> %s' % (day, prev_payload, payload)) if not dry_run: new_metric = metric.Metric.Create(group_key, 'logs_daily', t, payload) tasks.append(gen.Task(new_metric.Update, db_client)) yield tasks callback()
def testUpdateMetrics(self): def _DotDictsEqual(dict1, dict2): return dict1.flatten() == dict2.flatten() # Write some basic metrics. stats_1 = DotDict( {'itunes': { 'downloads': { 'US': 1, 'UK': 2, 'FR': 3 } }}) stats_2 = DotDict({'itunes': {'downloads': {'US': 5, 'DE': 6}}}) stats_3 = DotDict({'itunes': {'updates': {'US': 4, 'UK': 5, 'FR': 6}}}) self._WriteMetric('2013-01-01', stats_1) self._WriteMetric('2013-01-02', stats_2) self._WriteMetric('2013-01-03', stats_3) # Dict of new stats. new_stats = { '2013-01-01': stats_1, # No changes. '2013-01-02': stats_1, # Changed. '2013-01-03': stats_2, # Changed, but in a different prefix '2013-01-04': stats_2, # New metric. } # Dry-run only. self._RunAsync(logs_util.UpdateMetrics, self._client, new_stats, dry_run=True) self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01'))) self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-02'))) self.assertTrue(_DotDictsEqual(stats_3, self._GetMetric('2013-01-03'))) self.assertIsNone(self._GetMetric('2013-01-04')) self._RunAsync(logs_util.UpdateMetrics, self._client, new_stats, dry_run=True, prefix_to_erase='itunes') self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01'))) self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-02'))) self.assertTrue(_DotDictsEqual(stats_3, self._GetMetric('2013-01-03'))) self.assertIsNone(self._GetMetric('2013-01-04')) # Update only, don't erase previous metrics. self._RunAsync(logs_util.UpdateMetrics, self._client, new_stats, dry_run=False) # stats1 doesn't change. self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01'))) # stats2 gains UK and FR from stats1, keeps its own DE, and changes US. self.assertTrue( _DotDictsEqual( self._GetMetric('2013-01-02'), DotDict({ 'itunes': { 'downloads': { 'US': 1, 'UK': 2, 'FR': 3, 'DE': 6 } } }))) # stats3 keeps its own data (different prefix) and gains stats2 under 'downloads'. self.assertTrue( _DotDictsEqual( self._GetMetric('2013-01-03'), DotDict({ 'itunes': { 'downloads': { 'US': 5, 'DE': 6 }, 'updates': { 'US': 4, 'UK': 5, 'FR': 6 } } }))) # stats4 is brand new. self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-04'))) # Rewrite metrics. 2013-01-04 will still be filled. self._WriteMetric('2013-01-01', stats_1) self._WriteMetric('2013-01-02', stats_2) self._WriteMetric('2013-01-03', stats_3) # Update and erase a given prefix on previous metrics. self._RunAsync(logs_util.UpdateMetrics, self._client, new_stats, dry_run=False, prefix_to_erase='itunes.downloads') # stats1 doesn't change. self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01'))) # stats2 gains UK and FR from stats1, a new value for US, and drop DE. self.assertTrue( _DotDictsEqual( self._GetMetric('2013-01-02'), DotDict({'itunes': { 'downloads': { 'US': 1, 'UK': 2, 'FR': 3 } }}))) # stats3 keeps its own data (different prefix) and gains stats2 under 'downloads'. self.assertTrue( _DotDictsEqual( self._GetMetric('2013-01-03'), DotDict({ 'itunes': { 'downloads': { 'US': 5, 'DE': 6 }, 'updates': { 'US': 4, 'UK': 5, 'FR': 6 } } }))) # stats4 is brand new. self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-04'))) # Now write metrics at a custom timestamp. By default, they are written at noon. self.assertIsNone(self._GetMetric('2013-01-01', h=12, m=1)) new_stats2 = { '2013-01-01': stats_3, '2013-01-02': stats_2, '2013-01-03': stats_1, '2013-01-04': stats_1, } hms = logs_util.kDailyMetricsTimeByLogType['active_users'] self._RunAsync(logs_util.UpdateMetrics, self._client, new_stats2, dry_run=False, hms_tuple=hms) self.assertTrue( _DotDictsEqual(self._GetMetric('2013-01-01', h=12, m=1), stats_3)) self.assertTrue( _DotDictsEqual(self._GetMetric('2013-01-02', h=12, m=1), stats_2)) self.assertTrue( _DotDictsEqual(self._GetMetric('2013-01-03', h=12, m=1), stats_1)) self.assertTrue( _DotDictsEqual(self._GetMetric('2013-01-04', h=12, m=1), stats_1)) # Make sure the stats previously written at noon haven't changed. self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01'))) self.assertTrue( _DotDictsEqual( self._GetMetric('2013-01-02'), DotDict({'itunes': { 'downloads': { 'US': 1, 'UK': 2, 'FR': 3 } }}))) self.assertTrue( _DotDictsEqual( self._GetMetric('2013-01-03'), DotDict({ 'itunes': { 'downloads': { 'US': 5, 'DE': 6 }, 'updates': { 'US': 4, 'UK': 5, 'FR': 6 } } }))) self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-04')))
def ProcessTables(client, callback): user_count = Counter() locale_count = Counter() identity_count = Counter() identity_types = Counter() device_highest_version = Counter() device_has_notification = Counter() device_notification_count = Counter() settings_email_alerts = Counter() settings_sms_alerts = Counter() settings_push_alerts = Counter() settings_storage = Counter() settings_marketing = Counter() start_key = None limit = options.options.limit_users if options.options.limit_users > 0 else None while True: users, start_key = yield gen.Task(User.Scan, client, None, limit=limit, excl_start_key=start_key) for user in users: if user.IsTerminated(): # This includes terminated prospective users (pretty rare). user_count['terminated'] += 1 continue elif not user.IsRegistered(): user_count['prospective'] += 1 continue # From here on out, only registered users are part of the stats. user_count['registered'] += 1 # User locale. locale_count[user.locale or 'NONE'] += 1 # Count of identities by type. counts, types = yield gen.Task(CountByIdentity, client, user.user_id) identity_count[counts] += 1 identity_types[types] += 1 # Versions and notification status for user's devices. highest_version, notification_count = yield gen.Task( ProcessUserDevices, client, user.user_id) device_highest_version[highest_version.replace('.', '_') if highest_version else 'None'] += 1 device_notification_count[str(notification_count)] += 1 if notification_count > 0: device_has_notification['true'] += 1 else: device_has_notification['false'] += 1 # Account settings. settings = yield gen.Task(AccountSettings.QueryByUser, client, user.user_id, None) settings_email_alerts[settings.email_alerts or 'NA'] += 1 settings_sms_alerts[settings.sms_alerts or 'NA'] += 1 settings_push_alerts[settings.push_alerts or 'NA'] += 1 settings_storage[','.join(settings.storage_options) if settings. storage_options else 'NA'] += 1 settings_marketing[settings.marketing or 'NA'] += 1 if limit is not None: limit -= len(users) if limit <= 0: break if start_key is None: break day_stats = DotDict() day_stats['dynamodb.user.state'] = user_count day_stats['dynamodb.user.locale'] = locale_count day_stats['dynamodb.user.identities'] = identity_count day_stats['dynamodb.user.identity_types'] = identity_types day_stats['dynamodb.user.device_highest_version'] = device_highest_version day_stats[ 'dynamodb.user.device_has_notification'] = device_has_notification day_stats[ 'dynamodb.user.devices_with_notification'] = device_notification_count day_stats['dynamodb.user.settings_email_alerts'] = settings_email_alerts day_stats['dynamodb.user.settings_sms_alerts'] = settings_sms_alerts day_stats['dynamodb.user.settings_push_alerts'] = settings_push_alerts day_stats['dynamodb.user.settings_storage'] = settings_storage day_stats['dynamodb.user.settings_marketing'] = settings_marketing callback(day_stats)
def _SerializeMetrics(metrics): def _SkipMetric(name): for regex, allowed_groups in kFilteredMetrics: res = re.match(regex, k) if res is None: continue assert len(res.groups()) == 1 if res.groups()[0] in allowed_groups: return False else: return True return False def _AggregateMetric(running_sum, metric_name): """Given a metric name, determine whether we sum it into a different metric name or not. Returns whether the original metric needs to be processed. """ keep = True for regex, replacement, in kSummedMetrics: res = regex.sub(replacement, metric_name) if res != metric_name: keep = False if not _SkipMetric(res): running_sum[res] += v return keep data = defaultdict(dict) prev_metrics = {} seen_vars = set() for m in metrics: running_sum = Counter() timestamp = m.timestamp payload = DotDict(json.loads(m.payload)).flatten() for k, v in payload.iteritems(): keep_original = _AggregateMetric(running_sum, k) if keep_original and not _SkipMetric(k): running_sum[k] += v for k, v in running_sum.iteritems(): d = data[k] if len(d) == 0: d['is_average'] = False d['cluster_total'] = list() d['cluster_rate'] = list() d['description'] = k d['cluster_total'].append((timestamp, v)) if k in prev_metrics: _, prev_v = prev_metrics[k] # Since the metrics are written exactly once a day, no need to divide, just use the difference. diff = (v - prev_v) else: diff = v if k not in seen_vars: # Skip the first data point, we don't know what the previous value is. # We can't use prev_metrics since metrics with holes (eg: missing days) get removed. d['cluster_rate'].append((timestamp, None)) seen_vars.add(k) else: d['cluster_rate'].append((timestamp, diff)) prev_metrics[k] = (timestamp, v) # Look for metrics that haven't been set recently and insert None to break the graph. # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next # time. Instead, we break the metric if we haven't seen a data point in slightly over one day. for k, (t, v) in prev_metrics.items(): if (timestamp - t) > (constants.SECONDS_PER_DAY + constants.SECONDS_PER_HOUR): # data[k] can't be empty since we've seen this key before. data[k]['cluster_total'].append((timestamp, None)) data[k]['cluster_rate'].append((timestamp, -v)) # Remove it so we don't send back lots of data for no reason. del prev_metrics[k] return data
def ProcessFiles(merged_store, filenames, callback): """Fetch and process each file contained in 'filenames'.""" @gen.engine def _ProcessOneFile(contents, day_stats): """Iterate over the contents of a processed file: one entry per line. Increment stats for specific entries.""" buf = cStringIO.StringIO(contents) buf.seek(0) ui_events = Counter() while True: line = buf.readline() if not line: break parsed = json.loads(line) if not parsed: continue if 'version' not in parsed: continue # TODO(marc): lookup the user's device ID in dynamodb and get device model. payload = parsed['payload'] if 'name' in payload: if payload['name'] == '/assets/scan' and payload['type'] == 'full': day_stats.AddScan(parsed['version'], payload['num_scanned'], payload['elapsed']) elif payload['name'].startswith('/ui/'): ui_events[payload['name']] += 1 if ui_events: ui_events['/ui/anything'] += 1 day_stats.AddEvents(ui_events) buf.close() today = util.NowUTCToISO8601() # Group filenames by day. files_by_day = defaultdict(list) for filename in filenames: _, day, user = filename.split('/') if options.options.compute_today or today != day: files_by_day[day].append(filename) # Compute per-day totals. Toss them into a list, we'll want it sorted. stats_by_day = {} for day in sorted(files_by_day.keys()): # We don't really need to process days in-order, but it's nicer. files = files_by_day[day] day_stats = DayStats(day) for f in files: contents = '' try: contents = yield gen.Task(merged_store.Get, f) except Exception as e: logging.error('Error fetching file %s: %r' % (f, e)) continue _ProcessOneFile(contents, day_stats) if len(day_stats._long_scan_speeds) == 0: continue dd = DotDict() for p in [1, 5, 10, 25, 50, 75, 90, 95, 99]: dd['user_analytics.scans_gt1s_speed_percentile.%.2d' % p] = day_stats.LongScanSpeedPercentile(p) dd['user_analytics.scans_duration_percentile.%.2d' % p] = day_stats.ScanDurationPercentile(p) dd['user_analytics.scans_num_photos_percentile.%.2d' % p] = day_stats.PhotosScannedPercentile(p) dd['user_analytics.ui.event_users'] = day_stats.event_users dd['user_analytics.ui.total_events'] = day_stats.total_events stats_by_day[day] = dd callback(stats_by_day)
def testMetrics(self): """Test fetching/writing metrics.""" # Job being tested. job = Job(self._client, 'test_job') prev_runs = self._RunAsync(job.FindPreviousRuns) self.assertEqual(len(prev_runs), 0) # Unrelated job with a different name. Run entries should not show up under 'test_job'. other_job = Job(self._client, 'other_test_job') other_job.Start() self._RunAsync(other_job.RegisterRun, Job.STATUS_SUCCESS) other_job.Start() self._RunAsync(other_job.RegisterRun, Job.STATUS_FAILURE) # Calling RegisterRun without first calling Start fails because the start_time is not set. self.assertIsNone(job._start_time) self.assertRaises(AssertionError, self._RunAsync, job.RegisterRun, Job.STATUS_SUCCESS) job.Start() self.assertIsNotNone(job._start_time) # Overwrite it for easier testing. start_time = job._start_time = int(time.time() - (constants.SECONDS_PER_WEEK + constants.SECONDS_PER_HOUR)) # Write run summary with extra stats. stats = DotDict() stats['foo.bar'] = 5 stats['baz'] = 'test' self._RunAsync(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats, failure_msg='foo') # start_time is reset to prevent multiple calls to RegisterRun. self.assertIsNone(job._start_time) self.assertRaises(AssertionError, self._RunAsync, job.RegisterRun, Job.STATUS_SUCCESS) end_time = int(time.time()) # Default search is "runs started in the past week". prev_runs = self._RunAsync(job.FindPreviousRuns) self.assertEqual(len(prev_runs), 0) # Default search is for successful runs. prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time - 10)) self.assertEqual(len(prev_runs), 1) self.assertEqual(prev_runs[0]['start_time'], start_time) self.assertAlmostEqual(prev_runs[0]['end_time'], end_time, delta=10) self.assertEqual(prev_runs[0]['status'], Job.STATUS_SUCCESS) self.assertEqual(prev_runs[0]['stats.foo.bar'], 5) self.assertEqual(prev_runs[0]['stats.baz'], 'test') # failure_msg does nothing when status is SUCCESS. self.assertTrue('failure_msg' not in prev_runs[0]) # Search for failed runs. prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time - 10), status=Job.STATUS_FAILURE) self.assertEqual(len(prev_runs), 0) # Create a failed job summary. job.Start() start_time2 = job._start_time = int(time.time() - constants.SECONDS_PER_HOUR) self._RunAsync(job.RegisterRun, Job.STATUS_FAILURE, failure_msg='stack trace') # Find previous runs using a variety of filters. prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time - 10), status=Job.STATUS_SUCCESS) self.assertEqual(len(prev_runs), 1) self.assertEqual(prev_runs[0]['start_time'], start_time) prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time - 10), status=Job.STATUS_FAILURE) self.assertEqual(len(prev_runs), 1) self.assertEqual(prev_runs[0]['status'], Job.STATUS_FAILURE) self.assertEqual(prev_runs[0]['failure_msg'], 'stack trace') self.assertEqual(prev_runs[0]['start_time'], start_time2) prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time - 10)) self.assertEqual(len(prev_runs), 2) self.assertEqual(prev_runs[0]['start_time'], start_time) self.assertEqual(prev_runs[1]['start_time'], start_time2) prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time2 - 10)) self.assertEqual(len(prev_runs), 1) self.assertEqual(prev_runs[0]['start_time'], start_time2) prev_runs = self._RunAsync(job.FindPreviousRuns, start_timestamp=(start_time - 10), limit=1) self.assertEqual(len(prev_runs), 1) self.assertEqual(prev_runs[0]['start_time'], start_time2) # Find last successful run with optional payload key/value. prev_success = self._RunAsync(job.FindLastSuccess, start_timestamp=(start_time - 10)) self.assertIsNotNone(prev_success) self.assertEqual(prev_success['stats.foo.bar'], 5) prev_success = self._RunAsync(job.FindLastSuccess, start_timestamp=(start_time - 10), with_payload_key='stats.baz') self.assertIsNotNone(prev_success) self.assertEqual(prev_success['stats.foo.bar'], 5) prev_success = self._RunAsync(job.FindLastSuccess, start_timestamp=(start_time - 10), with_payload_key='stats.bar') self.assertIsNone(prev_success) prev_success = self._RunAsync(job.FindLastSuccess, start_timestamp=(start_time - 10), with_payload_key='stats.baz', with_payload_value='test') self.assertIsNotNone(prev_success) self.assertEqual(prev_success['stats.foo.bar'], 5) prev_success = self._RunAsync(job.FindLastSuccess, start_timestamp=(start_time - 10), with_payload_key='stats.baz', with_payload_value='test2') self.assertIsNone(prev_success)