Esempio n. 1
0
  def _AddMetric(self, metric):
    """Adds a single metric sample to the aggregation.  Metric samples must be added in
    chronological order.
    """
    machine = metric.machine_id
    time = metric.timestamp
    payload = DotDict(json.loads(metric.payload)).flatten()

    self.machines.add(machine)
    self.timestamps.add(time)
    for k in payload:
      if k not in self.counter_data:
        continue
      val = payload.get(k, None)
      if val is not None:
        self.counter_data[k].AddSample(machine, time, val)
Esempio n. 2
0
 def ParseContents(contents):
     result = DotDict()
     skipped_lines = []
     for line in contents.splitlines():
         tokens = line.split('\t')
         if tokens[0] == 'Provider':
             # Skip header line.
             skipped_lines.append(line)
             continue
         # Replace dots with underscores as we'll be using the version in a DotDict.
         version = tokens[5].replace('.', '_')
         if not version or version == ' ':
             # subscriptions do not have a version, use 'all'.
             version = 'all'
         type_id = tokens[6]
         # Use the type id if we don't have a name for it.
         type_name = itunes_trends_codes.PRODUCT_TYPE_IDENTIFIER.get(
             type_id, type_id)
         units = int(tokens[7])
         # Ignore proceeds, it does not reflect in-app purchases.
         store = tokens[12]
         result['itunes.%s.%s.%s' % (type_name, version, store)] = units
     assert len(skipped_lines
                ) <= 1, 'Skipped too many lines: %r' % skipped_lines
     return result
Esempio n. 3
0
    def RegisterRun(self, status, callback, stats=None, failure_msg=None):
        """Write the metric entry for this run. The start_time is set in Start(). end_time is now.
    If stats is not none, the DotDict is added to the metrics payload with the prefix 'stats'.
    If failure_msg is not None and status==STATUS_FAILURE, write the message in payload.failure_msg.
    """
        assert status in [None, Job.STATUS_SUCCESS,
                          Job.STATUS_FAILURE], 'Unknown status: %s' % status
        assert self._start_time is not None, 'Writing job summary, but Start never called.'
        end_time = int(time.time())
        payload = DotDict()
        payload['start_time'] = self._start_time
        payload['end_time'] = end_time
        payload['status'] = status
        if stats is not None:
            assert isinstance(stats,
                              DotDict), 'Stats is not a DotDict: %r' % stats
            payload['stats'] = stats
        if failure_msg is not None and status == Job.STATUS_FAILURE:
            payload['failure_msg'] = failure_msg

        cluster = metric.JOBS_STATS_NAME
        group_key = metric.Metric.EncodeGroupKey(
            cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily'))
        new_metric = metric.Metric.Create(group_key,
                                          self._name, self._start_time,
                                          json.dumps(payload))
        yield gen.Task(new_metric.Update, self._client)

        # Clear start time, we should not be able to run RegisterRun multiple times for a single run.
        self._start_time = None

        callback()
Esempio n. 4
0
def _Start(callback):
  """Grab a lock on job:analyze_analytics and call RunOnce. If we get a return value, write it to the job summary."""
  client = db_client.DBClient.Instance()
  job = Job(client, 'analyze_analytics')

  if options.options.require_lock:
    got_lock = yield gen.Task(job.AcquireLock)
    if got_lock == False:
      logging.warning('Failed to acquire job lock: exiting.')
      callback()
      return

  result = None
  job.Start()
  try:
    result = yield gen.Task(RunOnce, client, job)
  except:
    # Failure: log run summary with trace.
    typ, val, tb = sys.exc_info()
    msg = ''.join(traceback.format_exception(typ, val, tb))
    logging.info('Registering failed run with message: %s' % msg)
    yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg)
  else:
    if result is not None and not options.options.dry_run:
      # Successful run with data processed and not in dry-run mode: write run summary.
      stats = DotDict()
      stats['last_day'] = result
      logging.info('Registering successful run with stats: %r' % stats)
      yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats)
  finally:
    yield gen.Task(job.ReleaseLock)

  callback()
Esempio n. 5
0
 def _GetMetric(self, day, h=12, m=0, s=0):
     timestamp = util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s)
     existing_metrics = self._RunAsync(metric.Metric.QueryTimespan,
                                       self._client, self._group_key,
                                       timestamp, timestamp)
     if len(existing_metrics) == 0:
         return None
     return DotDict(json.loads(existing_metrics[0].payload))
Esempio n. 6
0
 def ToDotDict(self):
   """Returns the full data contained in this object in the form of a dotdict."""
   dt = DotDict()
   dt['user_requests.all'] = self._active_all
   dt['user_requests.post'] = self._active_post
   dt['user_requests.share'] = self._active_share
   dt['user_requests.view'] = self._active_view
   return dt
Esempio n. 7
0
 def _OnQueryMetric(min_metrics, max_metrics, metrics):
   self.assertTrue(len(metrics) >= min_metrics and len(metrics) <= max_metrics,
                   '%d not in [%d-%d]' % (len(metrics), min_metrics, max_metrics))
   for m in metrics:
     self.assertTrue(m.timestamp % 3 == 0)
   payload = DotDict(json.loads(metrics[0].payload))
   keys = counters.counters.flatten().keys()
   for k in keys:
     self.assertTrue(k in payload, 'Payload did not contain record for counter %s' % k)
   self.stop()
Esempio n. 8
0
def _SerializeMetrics(metrics, metric_name):
    def _DisplayParams():
        for regexp, sort, show in kSortByCount:
            if re.match(regexp, metric_name):
                return (sort, show)
        return (False, False)

    columns = Counter()
    data = []
    for m in metrics:
        timestamp = m.timestamp
        d = defaultdict(int)
        d['day'] = util.TimestampUTCToISO8601(timestamp).replace('-', '/')

        dd = DotDict(json.loads(m.payload))
        if metric_name not in dd:
            continue
        payload = dd[metric_name].flatten()
        for k, v in payload.iteritems():
            if metric_name in kMetricSignificantLevel:
                k = k.split('.')[kMetricSignificantLevel[metric_name]]
            columns[k] += v
            d[k] += v
            d['Total'] += v
            columns['Total'] += v
        data.append(d)

    # We now have "columns" with totals for each column. We need to sort everything.
    sort_by_count, show_total = _DisplayParams()
    if sort_by_count:
        sorted_cols = columns.most_common()
    else:
        sorted_cols = sorted([(k, v) for k, v in columns.iteritems()])
    cols = ['Day']
    cols.append('Total %d' % columns['Total'] if show_total else 'Total')
    for k, v in sorted_cols:
        if k == 'Total':
            continue
        cols.append('%s %d' % (k, v) if show_total else k)

    sorted_data = []
    for d in reversed(data):
        s = [d['day'], d['Total']]
        for k, _ in sorted_cols:
            if k == 'Total':
                continue
            s.append(d[k] if d[k] > 0 else '')
        sorted_data.append(s)

    return (cols, sorted_data)
Esempio n. 9
0
def SerializeMetrics(metrics):
    def _SkipMetric(name):
        for regex in kFilteredMetrics:
            res = re.match(regex, k)
            if res is not None:
                return False
        return True

    def _AggregateMetric(running_sum, metric_name):
        """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
        keep = True
        for regex, replacement, in kSummedMetrics:
            res = regex.sub(replacement, metric_name)
            if res != metric_name:
                keep = False
                if not _SkipMetric(res):
                    running_sum[res] += v
        return keep

    data = defaultdict(list)
    prev_metrics = {}
    seen_vars = set()
    for m in metrics:
        running_sum = Counter()
        timestamp = m.timestamp
        payload = DotDict(json.loads(m.payload)).flatten()
        for k, v in payload.iteritems():
            keep_original = _AggregateMetric(running_sum, k)
            if keep_original and not _SkipMetric(k):
                running_sum[k] += v
        for k, v in running_sum.iteritems():
            data[k].append((timestamp, v))

    return data
def SerializeMetrics(metrics):
  def _SkipMetric(name):
    for regex in kFilteredMetrics:
      res = re.match(regex, k)
      if res is not None:
        return False
    return True

  def _AggregateMetric(running_sum, metric_name):
    """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
    keep = True
    for regex, replacement, in kSummedMetrics:
      res = regex.sub(replacement, metric_name)
      if res != metric_name:
        keep = False
        if not _SkipMetric(res):
          running_sum[res] += v
    return keep

  data = defaultdict(list)
  prev_metrics = {}
  seen_vars = set()
  for m in metrics:
    running_sum = Counter()
    timestamp = m.timestamp
    payload = DotDict(json.loads(m.payload)).flatten()
    for k, v in payload.iteritems():
      keep_original = _AggregateMetric(running_sum, k)
      if keep_original and not _SkipMetric(k):
        running_sum[k] += v
    for k, v in running_sum.iteritems():
      data[k].append((timestamp, v))

  return data
Esempio n. 11
0
    def FindPreviousRuns(self,
                         callback,
                         start_timestamp=None,
                         status=None,
                         limit=None):
        """Look for previous runs of this job in the metrics table. Return all found runs regardless of status.
    If start_timestamp is None, search for jobs started in the last week.
    If status is specified, only return runs that finished with this status, otherwise return all runs.
    If limit is not None, return only the latest 'limit' runs, otherwise return all runs.
    Runs are sorted by timestamp.
    """
        assert status in [None, Job.STATUS_SUCCESS,
                          Job.STATUS_FAILURE], 'Unknown status: %s' % status
        runs = []
        cluster = metric.JOBS_STATS_NAME
        # TODO(marc): there is no guarantee that jobs will run daily (could be more or less). It shouldn't matter except
        # when accessing the data using counters.
        group_key = metric.Metric.EncodeGroupKey(
            cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily'))
        start_time = start_timestamp if start_timestamp is not None else time.time(
        ) - constants.SECONDS_PER_WEEK

        # Search for metrics from start_time to now.
        existing_metrics = yield gen.Task(metric.Metric.QueryTimespan,
                                          self._client, group_key, start_time,
                                          None)
        for m in existing_metrics:
            if m.machine_id != self._name:
                # Not for this job.
                continue

            # Parse and validate payload.
            payload = DotDict(json.loads(m.payload))
            assert 'start_time' in payload and 'status' in payload, 'Malformed payload: %r' % payload
            assert payload[
                'start_time'] == m.timestamp, 'Payload start_time does not match metric timestamp'

            if status is not None and payload['status'] != status:
                continue

            runs.append(payload)

        # Sort by timestamp, although it should already should be.
        runs.sort(key=lambda payload: payload['start_time'])
        if limit is None:
            callback(runs)
        else:
            callback(runs[-limit:])
Esempio n. 12
0
def _Start(callback):
    """Grab a lock on job:analyze_dynamodb and call RunOnce. If we get a return value, write it to the job summary."""
    # Setup throttling.
    for table in vf_schema.SCHEMA.GetTables():
        table.read_units = max(
            1, table.read_units // options.options.throttling_factor)
        table.write_units = max(
            1, table.write_units // options.options.throttling_factor)

    client = db_client.DBClient.Instance()
    job = Job(client, 'analyze_dynamodb')

    if not options.options.dry_run and options.options.limit_users > 0:
        logging.error(
            '--limit_users specified, but not running in dry-run mode. Aborting'
        )
        callback()
        return

    if options.options.require_lock:
        got_lock = yield gen.Task(job.AcquireLock)
        if got_lock == False:
            logging.warning('Failed to acquire job lock: exiting.')
            callback()
            return

    result = None
    job.Start()
    try:
        result = yield gen.Task(RunOnce, client, job)
    except:
        # Failure: log run summary with trace.
        typ, val, tb = sys.exc_info()
        msg = ''.join(traceback.format_exception(typ, val, tb))
        logging.info('Registering failed run with message: %s' % msg)
        yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg)
    else:
        if result is not None and not options.options.dry_run:
            # Successful run with data processed and not in dry-run mode: write run summary.
            stats = DotDict()
            stats['last_day'] = result
            logging.info('Registering successful run with stats: %r' % stats)
            yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats)
    finally:
        yield gen.Task(job.ReleaseLock)

    callback()
Esempio n. 13
0
def RunOnce(client, callback):
  today = util.NowUTCToISO8601()
  logging.info('getting table sizes for %s' % today)

  results = yield gen.Task(vf_schema.SCHEMA.VerifyOrCreate, client, verify_only=True)
  stats = DotDict()
  for r in sorted(results):
    name = r[0]
    props = r[1]
    stats['db.table.count.%s' % name] = props.count
    stats['db.table.size.%s' % name] = props.size_bytes

  # Replace the entire 'db.table' prefix in previous metrics.
  hms = logs_util.kDailyMetricsTimeByLogType['dynamodb_stats']
  yield gen.Task(logs_util.UpdateMetrics, client, {today: stats}, prefix_to_erase='db.table',
                 dry_run=options.options.dry_run, hms_tuple=hms)
  callback()
Esempio n. 14
0
def _Start(callback):
    """Grab a lock on job:server_log_metrics and call RunOnce. If we get a return value, write it to the job summary."""
    if options.options.send_email:
        # When running on devbox, this prompts for the passphrase. Skip if not sending email.
        EmailManager.SetInstance(SendGridEmailManager())
    else:
        EmailManager.SetInstance(LoggingEmailManager())

    client = db_client.DBClient.Instance()
    job = Job(client, 'server_log_metrics')

    if options.options.require_lock:
        got_lock = yield gen.Task(job.AcquireLock)
        if got_lock == False:
            logging.warning('Failed to acquire job lock: exiting.')
            callback()
            return

    is_full_run = all([
        options.options.compute_user_requests,
        options.options.compute_registration_delay,
        options.options.compute_app_versions
    ])

    result = None
    job.Start()
    try:
        result = yield gen.Task(RunOnce, client, job)
    except:
        # Failure: log run summary with trace.
        typ, val, tb = sys.exc_info()
        msg = ''.join(traceback.format_exception(typ, val, tb))
        logging.info('Registering failed run with message: %s' % msg)
        yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg)
    else:
        if result is not None and not options.options.dry_run and is_full_run:
            # Successful full run with data processed and not in dry-run mode: write run summary.
            stats = DotDict()
            stats['last_day'] = result
            logging.info('Registering successful run with stats: %r' % stats)
            yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats)
    finally:
        yield gen.Task(job.ReleaseLock)

    callback()
Esempio n. 15
0
def _Start(callback):
    """Grab a lock on job:itunes_trends and call RunOnce. If we get a return value, write it to the job summary."""
    assert options.options.user is not None and options.options.vendor_id is not None
    apple_id = '*****@*****.**' % options.options.user
    # Attempt to lookup iTunes Connect password from secrets.
    password = secrets.GetSecret('itunes_connect_%s' % options.options.user)
    assert password

    client = db_client.DBClient.Instance()
    job = Job(client, 'itunes_trends')

    if options.options.require_lock:
        got_lock = yield gen.Task(job.AcquireLock)
        if got_lock == False:
            logging.warning('Failed to acquire job lock: exiting.')
            callback()
            return

    result = None
    job.Start()
    try:
        result = yield gen.Task(RunOnce, client, job, apple_id, password)
    except:
        # Failure: log run summary with trace.
        msg = traceback.format_exc()
        logging.info('Registering failed run with message: %s' % msg)
        yield gen.Task(job.RegisterRun, Job.STATUS_FAILURE, failure_msg=msg)
    else:
        if result is not None and not options.options.dry_run:
            # Successful run with data processed and not in dry-run mode: write run summary.
            stats = DotDict()
            stats['last_day'] = result
            logging.info('Registering successful run with stats: %r' % stats)
            yield gen.Task(job.RegisterRun, Job.STATUS_SUCCESS, stats=stats)
    finally:
        yield gen.Task(job.ReleaseLock)

    callback()
Esempio n. 16
0
def UpdateMetrics(db_client, day_stats, callback, dry_run=True, prefix_to_erase=None, hms_tuple=None):
  """Write 'day_stats' to the metrics table. First lookup any existing metrics and update them.
  'day_stats' is a dictionary of {day_in_iso8601: DotDict}.
  If 'dry_run' is True, don't commit the changes to the metrics table, but perform all the work and log to info.
  If 'prefix_to_erase' is not None, we first replace the passed-in prefix with an empty dotdict.
  If 'hms_tuple' is not None, the timestamp for the metric entry will be with the specified hour/minute/second,
  otherwise, we use noon. To help with consistency, hms_tuple should come from kDailyMetricsTimeByLogType above.

  For example, given the existing metric: { itunes: { downloads: { 'US': 5, 'UK': 3 }, update: { ... }}}
  We can either:
    - Replace the downloads numbers: (the entire tree under 'prefix_to_erase' gets replaced)
      UpdateMetrics({'2013-02-01': {'itunes': {'downloads': { 'DE': 3, 'FR': 1 }}}}, prefix_to_erase='itunes.downloads')
      resulting in: { itunes: { downloads: { 'DE': 3, 'FR': 1 }, update: { ... }}}
    - Or we can update with partial stats:
      UpdateMetrics({'2013-02-01': {'itunes': { 'downloads': { 'DE': 3, 'FR': 1 }}}}, replace=False)
      resulting in: { itunes: { downloads: { 'US': 5, 'UK': 3, 'DE': 3, 'FR': 3 }, update: { ... }}}
  """
  if len(day_stats) == 0:
    callback()
    return

  cluster = metric.LOGS_STATS_NAME
  group_key = metric.Metric.EncodeGroupKey(cluster, metric.Metric.FindIntervalForCluster(cluster, 'daily'))

  # Convert YYYY-MM-DD into the timestamp for noon UTC.
  h, m, s = hms_tuple if hms_tuple is not None else (12, 0, 0)
  timestamps = [(util.ISO8601ToUTCTimestamp(day, hour=h, minute=m, second=s), day) for day in sorted(day_stats.keys())]

  # Query Metrics table for all metrics between the timestamps we have data for.
  existing_metrics = yield gen.Task(metric.Metric.QueryTimespan, db_client, group_key,
                                    timestamps[0][0], timestamps[-1][0])
  existing_dict = dict((m.timestamp, m) for m in existing_metrics)

  tasks = []
  for t, day in timestamps:
    data = day_stats[day]
    prev_metric = existing_dict.get(t, None)

    payload = json.dumps(data)
    if prev_metric is None:
      logging.info('%s: new metric: %r' % (day, payload))
    else:
      prev_payload = prev_metric.payload
      # We do this twice, it's simpler than making deepcopy work on DotDict.
      prev_data = DotDict(json.loads(prev_payload))
      new_data = DotDict(json.loads(prev_payload))
      if prefix_to_erase is not None:
        # We can't call 'del' on a DotDict's internals, so simply replace with an empty dotdict, we'll be repopulating.
        new_data[prefix_to_erase] = DotDict()

      # DotDict doesn't have an update() method.
      flat = new_data.flatten()
      flat.update(data.flatten())
      new_data = DotDict(flat)

      payload = json.dumps(new_data)
      if new_data.flatten() == prev_data.flatten():
        logging.info('%s: metric has not changed, skipping' % day)
        continue
      else:
        logging.info('%s: changed metric: %s -> %s' % (day, prev_payload, payload))

    if not dry_run:
      new_metric = metric.Metric.Create(group_key, 'logs_daily', t, payload)
      tasks.append(gen.Task(new_metric.Update, db_client))

  yield tasks
  callback()
Esempio n. 17
0
    def testUpdateMetrics(self):
        def _DotDictsEqual(dict1, dict2):
            return dict1.flatten() == dict2.flatten()

        # Write some basic metrics.
        stats_1 = DotDict(
            {'itunes': {
                'downloads': {
                    'US': 1,
                    'UK': 2,
                    'FR': 3
                }
            }})
        stats_2 = DotDict({'itunes': {'downloads': {'US': 5, 'DE': 6}}})
        stats_3 = DotDict({'itunes': {'updates': {'US': 4, 'UK': 5, 'FR': 6}}})
        self._WriteMetric('2013-01-01', stats_1)
        self._WriteMetric('2013-01-02', stats_2)
        self._WriteMetric('2013-01-03', stats_3)

        # Dict of new stats.
        new_stats = {
            '2013-01-01': stats_1,  # No changes.
            '2013-01-02': stats_1,  # Changed.
            '2013-01-03': stats_2,  # Changed, but in a different prefix
            '2013-01-04': stats_2,  # New metric.
        }

        # Dry-run only.
        self._RunAsync(logs_util.UpdateMetrics,
                       self._client,
                       new_stats,
                       dry_run=True)
        self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01')))
        self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-02')))
        self.assertTrue(_DotDictsEqual(stats_3, self._GetMetric('2013-01-03')))
        self.assertIsNone(self._GetMetric('2013-01-04'))

        self._RunAsync(logs_util.UpdateMetrics,
                       self._client,
                       new_stats,
                       dry_run=True,
                       prefix_to_erase='itunes')
        self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01')))
        self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-02')))
        self.assertTrue(_DotDictsEqual(stats_3, self._GetMetric('2013-01-03')))
        self.assertIsNone(self._GetMetric('2013-01-04'))

        # Update only, don't erase previous metrics.
        self._RunAsync(logs_util.UpdateMetrics,
                       self._client,
                       new_stats,
                       dry_run=False)
        # stats1 doesn't change.
        self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01')))
        # stats2 gains UK and FR from stats1, keeps its own DE, and changes US.
        self.assertTrue(
            _DotDictsEqual(
                self._GetMetric('2013-01-02'),
                DotDict({
                    'itunes': {
                        'downloads': {
                            'US': 1,
                            'UK': 2,
                            'FR': 3,
                            'DE': 6
                        }
                    }
                })))
        # stats3 keeps its own data (different prefix) and gains stats2 under 'downloads'.
        self.assertTrue(
            _DotDictsEqual(
                self._GetMetric('2013-01-03'),
                DotDict({
                    'itunes': {
                        'downloads': {
                            'US': 5,
                            'DE': 6
                        },
                        'updates': {
                            'US': 4,
                            'UK': 5,
                            'FR': 6
                        }
                    }
                })))
        # stats4 is brand new.
        self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-04')))

        # Rewrite metrics. 2013-01-04 will still be filled.
        self._WriteMetric('2013-01-01', stats_1)
        self._WriteMetric('2013-01-02', stats_2)
        self._WriteMetric('2013-01-03', stats_3)

        # Update and erase a given prefix on previous metrics.
        self._RunAsync(logs_util.UpdateMetrics,
                       self._client,
                       new_stats,
                       dry_run=False,
                       prefix_to_erase='itunes.downloads')
        # stats1 doesn't change.
        self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01')))
        # stats2 gains UK and FR from stats1, a new value for US, and drop DE.
        self.assertTrue(
            _DotDictsEqual(
                self._GetMetric('2013-01-02'),
                DotDict({'itunes': {
                    'downloads': {
                        'US': 1,
                        'UK': 2,
                        'FR': 3
                    }
                }})))
        # stats3 keeps its own data (different prefix) and gains stats2 under 'downloads'.
        self.assertTrue(
            _DotDictsEqual(
                self._GetMetric('2013-01-03'),
                DotDict({
                    'itunes': {
                        'downloads': {
                            'US': 5,
                            'DE': 6
                        },
                        'updates': {
                            'US': 4,
                            'UK': 5,
                            'FR': 6
                        }
                    }
                })))
        # stats4 is brand new.
        self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-04')))

        # Now write metrics at a custom timestamp. By default, they are written at noon.
        self.assertIsNone(self._GetMetric('2013-01-01', h=12, m=1))

        new_stats2 = {
            '2013-01-01': stats_3,
            '2013-01-02': stats_2,
            '2013-01-03': stats_1,
            '2013-01-04': stats_1,
        }
        hms = logs_util.kDailyMetricsTimeByLogType['active_users']
        self._RunAsync(logs_util.UpdateMetrics,
                       self._client,
                       new_stats2,
                       dry_run=False,
                       hms_tuple=hms)
        self.assertTrue(
            _DotDictsEqual(self._GetMetric('2013-01-01', h=12, m=1), stats_3))
        self.assertTrue(
            _DotDictsEqual(self._GetMetric('2013-01-02', h=12, m=1), stats_2))
        self.assertTrue(
            _DotDictsEqual(self._GetMetric('2013-01-03', h=12, m=1), stats_1))
        self.assertTrue(
            _DotDictsEqual(self._GetMetric('2013-01-04', h=12, m=1), stats_1))

        # Make sure the stats previously written at noon haven't changed.
        self.assertTrue(_DotDictsEqual(stats_1, self._GetMetric('2013-01-01')))
        self.assertTrue(
            _DotDictsEqual(
                self._GetMetric('2013-01-02'),
                DotDict({'itunes': {
                    'downloads': {
                        'US': 1,
                        'UK': 2,
                        'FR': 3
                    }
                }})))
        self.assertTrue(
            _DotDictsEqual(
                self._GetMetric('2013-01-03'),
                DotDict({
                    'itunes': {
                        'downloads': {
                            'US': 5,
                            'DE': 6
                        },
                        'updates': {
                            'US': 4,
                            'UK': 5,
                            'FR': 6
                        }
                    }
                })))
        self.assertTrue(_DotDictsEqual(stats_2, self._GetMetric('2013-01-04')))
Esempio n. 18
0
def ProcessTables(client, callback):
    user_count = Counter()
    locale_count = Counter()

    identity_count = Counter()
    identity_types = Counter()

    device_highest_version = Counter()
    device_has_notification = Counter()
    device_notification_count = Counter()

    settings_email_alerts = Counter()
    settings_sms_alerts = Counter()
    settings_push_alerts = Counter()
    settings_storage = Counter()
    settings_marketing = Counter()

    start_key = None
    limit = options.options.limit_users if options.options.limit_users > 0 else None
    while True:
        users, start_key = yield gen.Task(User.Scan,
                                          client,
                                          None,
                                          limit=limit,
                                          excl_start_key=start_key)

        for user in users:
            if user.IsTerminated():
                # This includes terminated prospective users (pretty rare).
                user_count['terminated'] += 1
                continue
            elif not user.IsRegistered():
                user_count['prospective'] += 1
                continue

            # From here on out, only registered users are part of the stats.
            user_count['registered'] += 1

            # User locale.
            locale_count[user.locale or 'NONE'] += 1

            # Count of identities by type.
            counts, types = yield gen.Task(CountByIdentity, client,
                                           user.user_id)
            identity_count[counts] += 1
            identity_types[types] += 1

            # Versions and notification status for user's devices.
            highest_version, notification_count = yield gen.Task(
                ProcessUserDevices, client, user.user_id)
            device_highest_version[highest_version.replace('.', '_')
                                   if highest_version else 'None'] += 1
            device_notification_count[str(notification_count)] += 1
            if notification_count > 0:
                device_has_notification['true'] += 1
            else:
                device_has_notification['false'] += 1

            # Account settings.
            settings = yield gen.Task(AccountSettings.QueryByUser, client,
                                      user.user_id, None)
            settings_email_alerts[settings.email_alerts or 'NA'] += 1
            settings_sms_alerts[settings.sms_alerts or 'NA'] += 1
            settings_push_alerts[settings.push_alerts or 'NA'] += 1
            settings_storage[','.join(settings.storage_options) if settings.
                             storage_options else 'NA'] += 1
            settings_marketing[settings.marketing or 'NA'] += 1

        if limit is not None:
            limit -= len(users)
            if limit <= 0:
                break

        if start_key is None:
            break

    day_stats = DotDict()
    day_stats['dynamodb.user.state'] = user_count
    day_stats['dynamodb.user.locale'] = locale_count
    day_stats['dynamodb.user.identities'] = identity_count
    day_stats['dynamodb.user.identity_types'] = identity_types
    day_stats['dynamodb.user.device_highest_version'] = device_highest_version
    day_stats[
        'dynamodb.user.device_has_notification'] = device_has_notification
    day_stats[
        'dynamodb.user.devices_with_notification'] = device_notification_count
    day_stats['dynamodb.user.settings_email_alerts'] = settings_email_alerts
    day_stats['dynamodb.user.settings_sms_alerts'] = settings_sms_alerts
    day_stats['dynamodb.user.settings_push_alerts'] = settings_push_alerts
    day_stats['dynamodb.user.settings_storage'] = settings_storage
    day_stats['dynamodb.user.settings_marketing'] = settings_marketing

    callback(day_stats)
Esempio n. 19
0
def _SerializeMetrics(metrics):
  def _SkipMetric(name):
    for regex, allowed_groups in kFilteredMetrics:
      res = re.match(regex, k)
      if res is None:
        continue
      assert len(res.groups()) == 1
      if res.groups()[0] in allowed_groups:
        return False
      else:
        return True
    return False

  def _AggregateMetric(running_sum, metric_name):
    """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
    keep = True
    for regex, replacement, in kSummedMetrics:
      res = regex.sub(replacement, metric_name)
      if res != metric_name:
        keep = False
        if not _SkipMetric(res):
          running_sum[res] += v
    return keep

  data = defaultdict(dict)
  prev_metrics = {}
  seen_vars = set()
  for m in metrics:
    running_sum = Counter()
    timestamp = m.timestamp
    payload = DotDict(json.loads(m.payload)).flatten()
    for k, v in payload.iteritems():
      keep_original = _AggregateMetric(running_sum, k)
      if keep_original and not _SkipMetric(k):
        running_sum[k] += v
    for k, v in running_sum.iteritems():
      d = data[k]
      if len(d) == 0:
        d['is_average'] = False
        d['cluster_total'] = list()
        d['cluster_rate'] = list()
        d['description'] = k
      d['cluster_total'].append((timestamp, v))
      if k in prev_metrics:
        _, prev_v = prev_metrics[k]
        # Since the metrics are written exactly once a day, no need to divide, just use the difference.
        diff = (v - prev_v)
      else:
        diff = v
      if k not in seen_vars:
        # Skip the first data point, we don't know what the previous value is.
        # We can't use prev_metrics since metrics with holes (eg: missing days) get removed.
        d['cluster_rate'].append((timestamp, None))
        seen_vars.add(k)
      else:
        d['cluster_rate'].append((timestamp, diff))
      prev_metrics[k] = (timestamp, v)
    # Look for metrics that haven't been set recently and insert None to break the graph.
    # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next
    # time. Instead, we break the metric if we haven't seen a data point in slightly over one day.
    for k, (t, v) in prev_metrics.items():
      if (timestamp - t) > (constants.SECONDS_PER_DAY + constants.SECONDS_PER_HOUR):
        # data[k] can't be empty since we've seen this key before.
        data[k]['cluster_total'].append((timestamp, None))
        data[k]['cluster_rate'].append((timestamp, -v))
        # Remove it so we don't send back lots of data for no reason.
        del prev_metrics[k]

  return data
Esempio n. 20
0
def _SerializeMetrics(metrics):
    def _SkipMetric(name):
        for regex, allowed_groups in kFilteredMetrics:
            res = re.match(regex, k)
            if res is None:
                continue
            assert len(res.groups()) == 1
            if res.groups()[0] in allowed_groups:
                return False
            else:
                return True
        return False

    def _AggregateMetric(running_sum, metric_name):
        """Given a metric name, determine whether we sum it into a different metric name or not.
    Returns whether the original metric needs to be processed.
    """
        keep = True
        for regex, replacement, in kSummedMetrics:
            res = regex.sub(replacement, metric_name)
            if res != metric_name:
                keep = False
                if not _SkipMetric(res):
                    running_sum[res] += v
        return keep

    data = defaultdict(dict)
    prev_metrics = {}
    seen_vars = set()
    for m in metrics:
        running_sum = Counter()
        timestamp = m.timestamp
        payload = DotDict(json.loads(m.payload)).flatten()
        for k, v in payload.iteritems():
            keep_original = _AggregateMetric(running_sum, k)
            if keep_original and not _SkipMetric(k):
                running_sum[k] += v
        for k, v in running_sum.iteritems():
            d = data[k]
            if len(d) == 0:
                d['is_average'] = False
                d['cluster_total'] = list()
                d['cluster_rate'] = list()
                d['description'] = k
            d['cluster_total'].append((timestamp, v))
            if k in prev_metrics:
                _, prev_v = prev_metrics[k]
                # Since the metrics are written exactly once a day, no need to divide, just use the difference.
                diff = (v - prev_v)
            else:
                diff = v
            if k not in seen_vars:
                # Skip the first data point, we don't know what the previous value is.
                # We can't use prev_metrics since metrics with holes (eg: missing days) get removed.
                d['cluster_rate'].append((timestamp, None))
                seen_vars.add(k)
            else:
                d['cluster_rate'].append((timestamp, diff))
            prev_metrics[k] = (timestamp, v)
        # Look for metrics that haven't been set recently and insert None to break the graph.
        # Since we may have sets of metrics stored at various timestamps, we can't just do this at the next
        # time. Instead, we break the metric if we haven't seen a data point in slightly over one day.
        for k, (t, v) in prev_metrics.items():
            if (timestamp - t) > (constants.SECONDS_PER_DAY +
                                  constants.SECONDS_PER_HOUR):
                # data[k] can't be empty since we've seen this key before.
                data[k]['cluster_total'].append((timestamp, None))
                data[k]['cluster_rate'].append((timestamp, -v))
                # Remove it so we don't send back lots of data for no reason.
                del prev_metrics[k]

    return data
Esempio n. 21
0
def ProcessFiles(merged_store, filenames, callback):
  """Fetch and process each file contained in 'filenames'."""

  @gen.engine
  def _ProcessOneFile(contents, day_stats):
    """Iterate over the contents of a processed file: one entry per line. Increment stats for specific entries."""
    buf = cStringIO.StringIO(contents)
    buf.seek(0)
    ui_events = Counter()
    while True:
      line = buf.readline()
      if not line:
        break
      parsed = json.loads(line)
      if not parsed:
        continue
      if 'version' not in parsed:
        continue
      # TODO(marc): lookup the user's device ID in dynamodb and get device model.
      payload = parsed['payload']
      if 'name' in payload:
        if payload['name'] == '/assets/scan' and payload['type'] == 'full':
          day_stats.AddScan(parsed['version'], payload['num_scanned'], payload['elapsed'])
        elif payload['name'].startswith('/ui/'):
          ui_events[payload['name']] += 1
    if ui_events:
      ui_events['/ui/anything'] += 1
    day_stats.AddEvents(ui_events)
    buf.close()

  today = util.NowUTCToISO8601()
  # Group filenames by day.
  files_by_day = defaultdict(list)
  for filename in filenames:
    _, day, user = filename.split('/')
    if options.options.compute_today or today != day:
      files_by_day[day].append(filename)

  # Compute per-day totals. Toss them into a list, we'll want it sorted.
  stats_by_day = {}
  for day in sorted(files_by_day.keys()):
    # We don't really need to process days in-order, but it's nicer.
    files = files_by_day[day]
    day_stats = DayStats(day)
    for f in files:
      contents = ''
      try:
        contents = yield gen.Task(merged_store.Get, f)
      except Exception as e:
        logging.error('Error fetching file %s: %r' % (f, e))
        continue
      _ProcessOneFile(contents, day_stats)
    if len(day_stats._long_scan_speeds) == 0:
      continue
    dd = DotDict()
    for p in [1, 5, 10, 25, 50, 75, 90, 95, 99]:
      dd['user_analytics.scans_gt1s_speed_percentile.%.2d' % p] = day_stats.LongScanSpeedPercentile(p)
      dd['user_analytics.scans_duration_percentile.%.2d' % p] = day_stats.ScanDurationPercentile(p)
      dd['user_analytics.scans_num_photos_percentile.%.2d' % p] = day_stats.PhotosScannedPercentile(p)
    dd['user_analytics.ui.event_users'] = day_stats.event_users
    dd['user_analytics.ui.total_events'] = day_stats.total_events
    stats_by_day[day] = dd

  callback(stats_by_day)
Esempio n. 22
0
    def testMetrics(self):
        """Test fetching/writing metrics."""
        # Job being tested.
        job = Job(self._client, 'test_job')
        prev_runs = self._RunAsync(job.FindPreviousRuns)
        self.assertEqual(len(prev_runs), 0)

        # Unrelated job with a different name. Run entries should not show up under 'test_job'.
        other_job = Job(self._client, 'other_test_job')
        other_job.Start()
        self._RunAsync(other_job.RegisterRun, Job.STATUS_SUCCESS)
        other_job.Start()
        self._RunAsync(other_job.RegisterRun, Job.STATUS_FAILURE)

        # Calling RegisterRun without first calling Start fails because the start_time is not set.
        self.assertIsNone(job._start_time)
        self.assertRaises(AssertionError, self._RunAsync, job.RegisterRun,
                          Job.STATUS_SUCCESS)

        job.Start()
        self.assertIsNotNone(job._start_time)
        # Overwrite it for easier testing.
        start_time = job._start_time = int(time.time() -
                                           (constants.SECONDS_PER_WEEK +
                                            constants.SECONDS_PER_HOUR))

        # Write run summary with extra stats.
        stats = DotDict()
        stats['foo.bar'] = 5
        stats['baz'] = 'test'
        self._RunAsync(job.RegisterRun,
                       Job.STATUS_SUCCESS,
                       stats=stats,
                       failure_msg='foo')
        # start_time is reset to prevent multiple calls to RegisterRun.
        self.assertIsNone(job._start_time)
        self.assertRaises(AssertionError, self._RunAsync, job.RegisterRun,
                          Job.STATUS_SUCCESS)

        end_time = int(time.time())
        # Default search is "runs started in the past week".
        prev_runs = self._RunAsync(job.FindPreviousRuns)
        self.assertEqual(len(prev_runs), 0)
        # Default search is for successful runs.
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time - 10))
        self.assertEqual(len(prev_runs), 1)
        self.assertEqual(prev_runs[0]['start_time'], start_time)
        self.assertAlmostEqual(prev_runs[0]['end_time'], end_time, delta=10)
        self.assertEqual(prev_runs[0]['status'], Job.STATUS_SUCCESS)
        self.assertEqual(prev_runs[0]['stats.foo.bar'], 5)
        self.assertEqual(prev_runs[0]['stats.baz'], 'test')
        # failure_msg does nothing when status is SUCCESS.
        self.assertTrue('failure_msg' not in prev_runs[0])

        # Search for failed runs.
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time - 10),
                                   status=Job.STATUS_FAILURE)
        self.assertEqual(len(prev_runs), 0)

        # Create a failed job summary.
        job.Start()
        start_time2 = job._start_time = int(time.time() -
                                            constants.SECONDS_PER_HOUR)
        self._RunAsync(job.RegisterRun,
                       Job.STATUS_FAILURE,
                       failure_msg='stack trace')

        # Find previous runs using a variety of filters.
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time - 10),
                                   status=Job.STATUS_SUCCESS)
        self.assertEqual(len(prev_runs), 1)
        self.assertEqual(prev_runs[0]['start_time'], start_time)
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time - 10),
                                   status=Job.STATUS_FAILURE)
        self.assertEqual(len(prev_runs), 1)
        self.assertEqual(prev_runs[0]['status'], Job.STATUS_FAILURE)
        self.assertEqual(prev_runs[0]['failure_msg'], 'stack trace')
        self.assertEqual(prev_runs[0]['start_time'], start_time2)
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time - 10))
        self.assertEqual(len(prev_runs), 2)
        self.assertEqual(prev_runs[0]['start_time'], start_time)
        self.assertEqual(prev_runs[1]['start_time'], start_time2)
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time2 - 10))
        self.assertEqual(len(prev_runs), 1)
        self.assertEqual(prev_runs[0]['start_time'], start_time2)
        prev_runs = self._RunAsync(job.FindPreviousRuns,
                                   start_timestamp=(start_time - 10),
                                   limit=1)
        self.assertEqual(len(prev_runs), 1)
        self.assertEqual(prev_runs[0]['start_time'], start_time2)

        # Find last successful run with optional payload key/value.
        prev_success = self._RunAsync(job.FindLastSuccess,
                                      start_timestamp=(start_time - 10))
        self.assertIsNotNone(prev_success)
        self.assertEqual(prev_success['stats.foo.bar'], 5)
        prev_success = self._RunAsync(job.FindLastSuccess,
                                      start_timestamp=(start_time - 10),
                                      with_payload_key='stats.baz')
        self.assertIsNotNone(prev_success)
        self.assertEqual(prev_success['stats.foo.bar'], 5)
        prev_success = self._RunAsync(job.FindLastSuccess,
                                      start_timestamp=(start_time - 10),
                                      with_payload_key='stats.bar')
        self.assertIsNone(prev_success)
        prev_success = self._RunAsync(job.FindLastSuccess,
                                      start_timestamp=(start_time - 10),
                                      with_payload_key='stats.baz',
                                      with_payload_value='test')
        self.assertIsNotNone(prev_success)
        self.assertEqual(prev_success['stats.foo.bar'], 5)
        prev_success = self._RunAsync(job.FindLastSuccess,
                                      start_timestamp=(start_time - 10),
                                      with_payload_key='stats.baz',
                                      with_payload_value='test2')
        self.assertIsNone(prev_success)