Beispiel #1
0
  def _AddAlertsToDataStore(self):
    """Adds sample data, including triaged and non-triaged alerts."""
    key_map = {}

    sheriff_key = sheriff.Sheriff(
        id='Chromium Perf Sheriff', email='*****@*****.**').put()
    testing_common.AddTests(['ChromiumGPU'], ['linux-release'], {
        'scrolling-benchmark': {
            'first_paint': {},
            'first_paint_ref': {},
            'mean_frame_time': {},
            'mean_frame_time_ref': {},
        }
    })
    first_paint = utils.TestKey(
        'ChromiumGPU/linux-release/scrolling-benchmark/first_paint')
    mean_frame_time = utils.TestKey(
        'ChromiumGPU/linux-release/scrolling-benchmark/mean_frame_time')

    # By default, all TestMetadata entities have an improvement_direction of
    # UNKNOWN, meaning that neither direction is considered an improvement.
    # Here we set the improvement direction so that some anomalies are
    # considered improvements.
    for test_key in [first_paint, mean_frame_time]:
      test = test_key.get()
      test.improvement_direction = anomaly.DOWN
      test.put()

    # Add some (12) non-triaged alerts.
    for end_rev in range(10000, 10120, 10):
      test_key = first_paint if end_rev % 20 == 0 else mean_frame_time
      ref_test_key = utils.TestKey('%s_ref' % utils.TestPath(test_key))
      anomaly_entity = anomaly.Anomaly(
          start_revision=end_rev - 5, end_revision=end_rev, test=test_key,
          median_before_anomaly=100, median_after_anomaly=200,
          ref_test=ref_test_key, sheriff=sheriff_key)
      anomaly_entity.SetIsImprovement()
      anomaly_key = anomaly_entity.put()
      key_map[end_rev] = anomaly_key.urlsafe()

    # Add some (2) already-triaged alerts.
    for end_rev in range(10120, 10140, 10):
      test_key = first_paint if end_rev % 20 == 0 else mean_frame_time
      ref_test_key = utils.TestKey('%s_ref' % utils.TestPath(test_key))
      bug_id = -1 if end_rev % 20 == 0 else 12345
      anomaly_entity = anomaly.Anomaly(
          start_revision=end_rev - 5, end_revision=end_rev, test=test_key,
          median_before_anomaly=100, median_after_anomaly=200,
          ref_test=ref_test_key, bug_id=bug_id, sheriff=sheriff_key)
      anomaly_entity.SetIsImprovement()
      anomaly_key = anomaly_entity.put()
      key_map[end_rev] = anomaly_key.urlsafe()
      if bug_id > 0:
        bug_data.Bug(id=bug_id).put()

    # Add some (6) non-triaged improvements.
    for end_rev in range(10140, 10200, 10):
      test_key = mean_frame_time
      ref_test_key = utils.TestKey('%s_ref' % utils.TestPath(test_key))
      anomaly_entity = anomaly.Anomaly(
          start_revision=end_rev - 5, end_revision=end_rev, test=test_key,
          median_before_anomaly=200, median_after_anomaly=100,
          ref_test=ref_test_key, sheriff=sheriff_key)
      anomaly_entity.SetIsImprovement()
      anomaly_key = anomaly_entity.put()
      self.assertTrue(anomaly_entity.is_improvement)
      key_map[end_rev] = anomaly_key.urlsafe()

    return key_map
Beispiel #2
0
 def _AddRows(self, keys):
     for key in keys:
         testing_common.AddRows(utils.TestPath(key), [1, 2, 3, RECENT_REV])
Beispiel #3
0
 def testGetTestMetadataKey_TestMetadata(self):
   a = anomaly.Anomaly(test=utils.TestKey('a/b/c/d'))
   k = a.GetTestMetadataKey()
   self.assertEqual('TestMetadata', k.kind())
   self.assertEqual('a/b/c/d', k.id())
   self.assertEqual('a/b/c/d', utils.TestPath(k))
Beispiel #4
0
 def testTestPath_TestMetadata(self):
     key = ndb.Key('TestMetadata', 'm/b/suite/metric')
     self.assertEqual('m/b/suite/metric', utils.TestPath(key))
Beispiel #5
0
 def test_path(self):
     """Slash-separated list of key parts, 'master/bot/suite/chart/...'."""
     return utils.TestPath(self.key)
def _IsRefBuild(test):
    """Returns True if test is a reference build."""
    test_path = utils.TestPath(test.key)
    return test_path[-1] == 'ref' or test_path[-1].endswith('_ref')
 def _AddRows(self, keys):
     for key in keys:
         testing_common.AddRows(utils.TestPath(key), [1, 2, 3, 455588])
Beispiel #8
0
class Anomaly(internal_only_model.InternalOnlyModel):
    """Represents a change-point or step found in the data series for a test.

  An Anomaly can be an upward or downward change, and can represent an
  improvement or a regression.
  """
    # Whether the alert should only be viewable by internal users.
    internal_only = ndb.BooleanProperty(indexed=True, default=False)

    # The time the alert fired.
    timestamp = ndb.DateTimeProperty(indexed=True, auto_now_add=True)

    # TODO(dberris): Remove these after migrating all issues to use the issues
    # repeated field, to allow an anomaly to be represented in multiple issues on
    # different Monorail projects.
    # === DEPRECATED START ===
    # Note: -1 denotes an invalid alert and -2 an ignored alert.
    # By default, this is None, which denotes a non-triaged alert.
    bug_id = ndb.IntegerProperty(indexed=True)

    # This is the project to which an anomaly is associated with, in the issue
    # tracker service.
    project_id = ndb.StringProperty(indexed=True, default='chromium')
    # === DEPRECATED END   ===

    # AlertGroups used for grouping
    groups = ndb.KeyProperty(indexed=True, repeated=True)

    # This is the list of issues associated with the anomaly. We're doing this to
    # allow a single anomaly to be represented in multiple issues in different
    # issue trackers.
    issues = ndb.StructuredProperty(Issue, indexed=True, repeated=True)

    # This field aims to replace the 'bug_id' field serving as a state indicator.
    state = ndb.StringProperty(
        default='untriaged',
        choices=['untriaged', 'triaged', 'ignored', 'invalid'])

    # The subscribers who recieve alerts
    subscriptions = ndb.LocalStructuredProperty(Subscription, repeated=True)
    subscription_names = ndb.StringProperty(indexed=True, repeated=True)

    # Each Alert is related to one Test.
    test = ndb.KeyProperty(indexed=True)
    statistic = ndb.StringProperty(indexed=True)

    # We'd like to be able to query Alerts by Master, Bot, and Benchmark names.
    master_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[0], indexed=True)
    bot_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[1], indexed=True)
    benchmark_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[2], indexed=True)

    # Each Alert has a revision range it's associated with; however,
    # start_revision and end_revision could be the same.
    start_revision = ndb.IntegerProperty(indexed=True)
    end_revision = ndb.IntegerProperty(indexed=True)

    # The revisions to use for display, if different than point id.
    display_start = ndb.IntegerProperty(indexed=False)
    display_end = ndb.IntegerProperty(indexed=False)

    # Ownership data, mapping e-mails to the benchmark's owners' emails and
    # component as the benchmark's Monorail component
    ownership = ndb.JsonProperty()

    # Alert grouping is used to overide the default alert group (test suite)
    # for auto-triage.
    alert_grouping = ndb.StringProperty(indexed=False, repeated=True)

    # The number of points before and after this anomaly that were looked at
    # when finding this anomaly.
    segment_size_before = ndb.IntegerProperty(indexed=False)
    segment_size_after = ndb.IntegerProperty(indexed=False)

    # The medians of the segments before and after the anomaly.
    median_before_anomaly = ndb.FloatProperty(indexed=False)
    median_after_anomaly = ndb.FloatProperty(indexed=False)

    # The standard deviation of the segments before the anomaly.
    std_dev_before_anomaly = ndb.FloatProperty(indexed=False)

    # The number of points that were used in the before/after segments.
    # This is also  returned by FindAnomalies
    window_end_revision = ndb.IntegerProperty(indexed=False)

    # In order to estimate how likely it is that this anomaly is due to noise,
    # t-test may be performed on the points before and after. The t-statistic,
    # degrees of freedom, and p-value are potentially-useful intermediary results.
    t_statistic = ndb.FloatProperty(indexed=False)
    degrees_of_freedom = ndb.FloatProperty(indexed=False)
    p_value = ndb.FloatProperty(indexed=False)

    # Whether this anomaly represents an improvement; if false, this anomaly is
    # considered to be a regression.
    is_improvement = ndb.BooleanProperty(indexed=True, default=False)

    # Whether this anomaly recovered (i.e. if this is a step down, whether there
    # is a corresponding step up later on, or vice versa.)
    recovered = ndb.BooleanProperty(indexed=True, default=False)

    # If the TestMetadata alerted upon has a ref build, store the ref build.
    ref_test = ndb.KeyProperty(indexed=False)

    # The corresponding units from the TestMetaData entity.
    units = ndb.StringProperty(indexed=False)

    recipe_bisects = ndb.KeyProperty(repeated=True, indexed=False)
    pinpoint_bisects = ndb.StringProperty(repeated=True, indexed=False)

    # Additional Metadata
    # ====
    #
    # Timestamps for the earliest and latest Row we used to determine whether this
    # is an anomaly. We use this to compute time-to-detection.
    earliest_input_timestamp = ndb.DateTimeProperty()
    latest_input_timestamp = ndb.DateTimeProperty()

    @property
    def percent_changed(self):
        """The percent change from before the anomaly to after."""
        if self.median_before_anomaly == 0.0:
            return sys.float_info.max
        difference = self.median_after_anomaly - self.median_before_anomaly
        return 100 * difference / self.median_before_anomaly

    @property
    def absolute_delta(self):
        """The absolute change from before the anomaly to after."""
        return self.median_after_anomaly - self.median_before_anomaly

    @property
    def direction(self):
        """Whether the change is numerically an increase or decrease."""
        if self.median_before_anomaly < self.median_after_anomaly:
            return UP
        return DOWN

    def GetDisplayPercentChanged(self):
        """Gets a string showing the percent change."""
        if abs(self.percent_changed) == sys.float_info.max:
            return FREAKIN_HUGE
        else:
            return '%.1f%%' % abs(self.percent_changed)

    def GetDisplayAbsoluteChanged(self):
        """Gets a string showing the absolute change."""
        if abs(self.absolute_delta) == sys.float_info.max:
            return FREAKIN_HUGE
        else:
            return '%f' % abs(self.absolute_delta)

    def GetRefTestPath(self):
        if not self.ref_test:
            return None
        return utils.TestPath(self.ref_test)

    def SetIsImprovement(self, test=None):
        """Sets whether the alert is an improvement for the given test."""
        if not test:
            test = self.GetTestMetadataKey().get()
        # |self.direction| is never equal to |UNKNOWN| (see the definition above)
        # so when the test improvement direction is |UNKNOWN|, |self.is_improvement|
        # will be False.
        self.is_improvement = (self.direction == test.improvement_direction)

    def GetTestMetadataKey(self):
        """Get the key for the TestMetadata entity of this alert.

    We are in the process of converting from Test entities to TestMetadata.
    Until this is done, it's possible that an alert may store either Test
    or TestMetadata in the 'test' KeyProperty. This gets the TestMetadata key
    regardless of what's stored.
    """
        return utils.TestMetadataKey(self.test)

    @classmethod
    @ndb.tasklet
    def QueryAsync(cls,
                   bot_name=None,
                   bug_id=None,
                   count_limit=0,
                   deadline_seconds=50,
                   inequality_property=None,
                   is_improvement=None,
                   key=None,
                   keys_only=False,
                   limit=100,
                   master_name=None,
                   max_end_revision=None,
                   max_start_revision=None,
                   max_timestamp=None,
                   min_end_revision=None,
                   min_start_revision=None,
                   min_timestamp=None,
                   recovered=None,
                   subscriptions=None,
                   start_cursor=None,
                   test=None,
                   test_keys=None,
                   test_suite_name=None,
                   project_id=None):
        if key:
            # This tasklet isn't allowed to catch the internal_only AssertionError.
            alert = yield ndb.Key(urlsafe=key).get_async()
            raise ndb.Return(([alert], None, 1))

        # post_filters can cause results to be empty, depending on the shape of the
        # data and which filters are applied in the query and which filters are
        # applied after the query. Automatically chase cursors until some results
        # are found, but stay under the request timeout.
        results = []
        deadline = time.time() + deadline_seconds
        while not results and time.time() < deadline:
            query = cls.query()
            equality_properties = []
            if subscriptions:  # Empty subscriptions is not allowed in query
                query = query.filter(cls.subscription_names.IN(subscriptions))
                equality_properties.append('subscription_names')
                inequality_property = 'key'
            if is_improvement is not None:
                query = query.filter(cls.is_improvement == is_improvement)
                equality_properties.append('is_improvement')
                inequality_property = 'key'
            if bug_id is not None:
                if bug_id == '':
                    query = query.filter(cls.bug_id == None)
                    equality_properties.append('bug_id')
                    inequality_property = 'key'
                elif bug_id != '*':
                    query = query.filter(cls.bug_id == int(bug_id))
                    equality_properties.append('bug_id')
                    inequality_property = 'key'
                # bug_id='*' translates to bug_id != None, which is handled with the
                # other inequality filters.
            if recovered is not None:
                query = query.filter(cls.recovered == recovered)
                equality_properties.append('recovered')
                inequality_property = 'key'
            if test or test_keys:
                if not test_keys:
                    test_keys = []
                if test:
                    test_keys += [
                        utils.OldStyleTestKey(test),
                        utils.TestMetadataKey(test)
                    ]
                query = query.filter(cls.test.IN(test_keys))
                query = query.order(cls.key)
                equality_properties.append('test')
                inequality_property = 'key'
            if master_name:
                query = query.filter(cls.master_name == master_name)
                equality_properties.append('master_name')
                inequality_property = 'key'
            if bot_name:
                query = query.filter(cls.bot_name == bot_name)
                equality_properties.append('bot_name')
                inequality_property = 'key'
            if test_suite_name:
                query = query.filter(cls.benchmark_name == test_suite_name)
                equality_properties.append('benchmark_name')
                inequality_property = 'key'

            query, post_filters = cls._InequalityFilters(
                query, equality_properties, inequality_property, bug_id,
                min_end_revision, max_end_revision, min_start_revision,
                max_start_revision, min_timestamp, max_timestamp)
            if post_filters:
                keys_only = False
            query = query.order(-cls.timestamp, cls.key)

            futures = [
                query.fetch_page_async(limit,
                                       start_cursor=start_cursor,
                                       keys_only=keys_only)
            ]
            if count_limit:
                futures.append(query.count_async(count_limit))
            query_duration = timing.WallTimeLogger('query_duration')
            with query_duration:
                yield futures
            results, start_cursor, more = futures[0].get_result()
            if count_limit:
                count = futures[1].get_result()
            else:
                count = len(results)
            logging.info('query_results_count=%d', len(results))
            if results:
                logging.info('duration_per_result=%f',
                             query_duration.seconds / len(results))
            if post_filters:
                results = [
                    alert for alert in results if all(
                        post_filter(alert) for post_filter in post_filters)
                ]
            # Temporary treat project_id as a postfilter. This is because some
            # chromium alerts have been booked with empty project_id.
            if project_id is not None:
                results = [
                    alert for alert in results
                    if alert.project_id == project_id
                    or alert.project_id == '' and project_id == 'chromium'
                ]
            if not more:
                start_cursor = None
            if not start_cursor:
                break
        raise ndb.Return((results, start_cursor, count))

    @classmethod
    def _InequalityFilters(cls, query, equality_properties,
                           inequality_property, bug_id, min_end_revision,
                           max_end_revision, min_start_revision,
                           max_start_revision, min_timestamp, max_timestamp):
        # A query cannot have more than one inequality filter.
        # inequality_property allows users to decide which property to filter in the
        # query, which can significantly affect performance. If other inequalities
        # are specified, they will be handled by post_filters.

        # If callers set inequality_property without actually specifying a
        # corresponding inequality filter, then reset the inequality_property and
        # compute it automatically as if it were not specified.
        if inequality_property == 'start_revision':
            if min_start_revision is None and max_start_revision is None:
                inequality_property = None
        elif inequality_property == 'end_revision':
            if min_end_revision is None and max_end_revision is None:
                inequality_property = None
        elif inequality_property == 'timestamp':
            if min_timestamp is None and max_timestamp is None:
                inequality_property = None
        elif inequality_property == 'bug_id':
            if bug_id != '*':
                inequality_property = None
        elif inequality_property == 'key':
            if equality_properties == [
                    'subscription_names'
            ] and (min_start_revision or max_start_revision):
                # Use the composite index (subscription_names, start_revision,
                # -timestamp). See index.yaml.
                inequality_property = 'start_revision'
        else:
            inequality_property = None

        if inequality_property is None:
            # Compute a default inequality_property.
            # We prioritise the 'min' filters first because that lets us limit the
            # amount of data the Datastore instances might handle.
            if min_start_revision:
                inequality_property = 'start_revision'
            elif min_end_revision:
                inequality_property = 'end_revision'
            elif min_timestamp:
                inequality_property = 'timestamp'
            elif max_start_revision:
                inequality_property = 'start_revision'
            elif max_end_revision:
                inequality_property = 'end_revision'
            elif max_timestamp:
                inequality_property = 'timestamp'
            elif bug_id == '*':
                inequality_property = 'bug_id'

        post_filters = []
        if not inequality_property:
            return query, post_filters

        if not datastore_hooks.IsUnalteredQueryPermitted():
            # _DatastorePreHook will filter internal_only=False. index.yaml does not
            # specify indexes for `internal_only, $inequality_property, -timestamp`.
            # Use post_filters for all inequality properties.
            inequality_property = ''

        if bug_id == '*':
            if inequality_property == 'bug_id':
                logging.info('filter:bug_id!=None')
                query = query.filter(cls.bug_id != None).order(cls.bug_id)
            else:
                logging.info('post_filter:bug_id!=None')
                post_filters.append(lambda a: a.bug_id != None)

        # Apply the min filters before the max filters, because that lets us
        # optimise the query application for more recent data, reducing the amount
        # of data post-processing.
        if min_start_revision:
            min_start_revision = int(min_start_revision)
            if inequality_property == 'start_revision':
                logging.info('filter:min_start_revision=%d',
                             min_start_revision)
                query = query.filter(cls.start_revision >= min_start_revision)
                query = query.order(cls.start_revision)
            else:
                logging.info('post_filter:min_start_revision=%d',
                             min_start_revision)
                post_filters.append(
                    lambda a: a.start_revision >= min_start_revision)

        if min_end_revision:
            min_end_revision = int(min_end_revision)
            if inequality_property == 'end_revision':
                logging.info('filter:min_end_revision=%d', min_end_revision)
                query = query.filter(cls.end_revision >= min_end_revision)
                query = query.order(cls.end_revision)
            else:
                logging.info('post_filter:min_end_revision=%d',
                             min_end_revision)
                post_filters.append(
                    lambda a: a.end_revision >= min_end_revision)

        if min_timestamp:
            if inequality_property == 'timestamp':
                logging.info('filter:min_timestamp=%d',
                             time.mktime(min_timestamp.utctimetuple()))
                query = query.filter(cls.timestamp >= min_timestamp)
            else:
                logging.info('post_filter:min_timestamp=%d',
                             time.mktime(min_timestamp.utctimetuple()))
                post_filters.append(lambda a: a.timestamp >= min_timestamp)

        if max_start_revision:
            max_start_revision = int(max_start_revision)
            if inequality_property == 'start_revision':
                logging.info('filter:max_start_revision=%d',
                             max_start_revision)
                query = query.filter(cls.start_revision <= max_start_revision)
                query = query.order(-cls.start_revision)
            else:
                logging.info('post_filter:max_start_revision=%d',
                             max_start_revision)
                post_filters.append(
                    lambda a: a.start_revision <= max_start_revision)

        if max_end_revision:
            max_end_revision = int(max_end_revision)
            if inequality_property == 'end_revision':
                logging.info('filter:max_end_revision=%d', max_end_revision)
                query = query.filter(cls.end_revision <= max_end_revision)
                query = query.order(-cls.end_revision)
            else:
                logging.info('post_filter:max_end_revision=%d',
                             max_end_revision)
                post_filters.append(
                    lambda a: a.end_revision <= max_end_revision)

        if max_timestamp:
            if inequality_property == 'timestamp':
                logging.info('filter:max_timestamp=%d',
                             time.mktime(max_timestamp.utctimetuple()))
                query = query.filter(cls.timestamp <= max_timestamp)
            else:
                logging.info('post_filter:max_timestamp=%d',
                             time.mktime(max_timestamp.utctimetuple()))
                post_filters.append(lambda a: a.timestamp <= max_timestamp)

        return query, post_filters
Beispiel #9
0
    def _FillBenchmarkDetailsToHealthReport(self, benchmark_name, report_name,
                                            num_days, master):
        benchmark = ndb.Key('BenchmarkHealthReport', report_name,
                            'BenchmarkHealthData', benchmark_name).get()
        if not benchmark:
            return

        durations_pattern = '%s/*/%s/BenchmarkDuration' % (master,
                                                           benchmark_name)
        test_paths = list_tests.GetTestsMatchingPattern(durations_pattern)
        futures = set()
        for test_path in test_paths:
            key = utils.OldStyleTestKey(test_path)
            query = graph_data.Row.query(graph_data.Row.parent_test == key)
            query = query.order(-graph_data.Row.revision)
            futures.add(query.get_async())
        while futures:
            f = ndb.Future.wait_any(futures)
            futures.remove(f)
            row = f.get_result()
            if not row:
                continue
            bot = utils.TestPath(row.parent_test).split('/')[1]
            benchmark.bots.append(
                benchmark_health_data.BotHealthData(name=bot,
                                                    duration=row.value,
                                                    last_update=row.timestamp))

        bug_ids = set()
        query = anomaly.Anomaly.query(
            anomaly.Anomaly.benchmark_name == benchmark_name,
            anomaly.Anomaly.master_name == master,
            anomaly.Anomaly.is_improvement == False, anomaly.Anomaly.timestamp
            > datetime.datetime.now() - datetime.timedelta(days=int(num_days)))
        query = query.order(-anomaly.Anomaly.timestamp)
        anomalies = query.fetch()
        for alert in anomalies:
            bug_id = alert.bug_id
            if bug_id and bug_id > 0:
                bug_ids.add(bug_id)
            benchmark.alerts.append(
                benchmark_health_data.AlertHealthData(
                    bug_id=bug_id,
                    test_path=utils.TestPath(alert.GetTestMetadataKey()),
                    percent_changed=alert.GetDisplayPercentChanged(),
                    absolute_delta=alert.GetDisplayAbsoluteChanged()))

        for bug_id in bug_ids:
            details = bug_details.GetBugDetails(bug_id,
                                                utils.ServiceAccountHttp())
            benchmark.bugs.append(
                benchmark_health_data.BugHealthData(
                    bug_id=bug_id,
                    num_comments=len(details['comments']),
                    published=details['published'],
                    state=details['state'],
                    status=details['status'],
                    summary=details['summary']))
            for review in details['review_urls']:
                benchmark.reviews.append(
                    benchmark_health_data.ReviewData(review_url=review,
                                                     bug_id=bug_id))
            for bisect in details['bisects']:
                benchmark.bisects.append(
                    benchmark_health_data.BisectHealthData(
                        bug_id=bug_id,
                        buildbucket_link=bisect['buildbucket_link'],
                        metric=bisect['metric'],
                        status=bisect['status'],
                        bot=bisect['bot']))
        benchmark.is_complete = True
        benchmark.put()
Beispiel #10
0
 def testGet_WithBogusParameterNames_ParameterIgnored(self, simulate_mock):
     test_key = self._AddSampleData()
     response = self.testapp.get('/debug_alert?test_path=%s&config=%s' %
                                 (utils.TestPath(test_key), '{"foo":0.75}'))
     simulate_mock.assert_called_once_with(mock.ANY)
     self.assertNotIn('"foo"', response.body)
Beispiel #11
0
 def testGet_WithValidTestPath_ShowsChart(self):
     test_key = self._AddSampleData()
     test_path = utils.TestPath(test_key)
     response = self.testapp.get('/debug_alert?test_path=%s' % test_path)
     self.assertIn('id="plot"', response.body)
Beispiel #12
0
class Alert(internal_only_model.InternalOnlyModel):
  """General base class for alerts."""

  # Whether the alert should only be viewable by internal users.
  internal_only = ndb.BooleanProperty(indexed=True, default=False)

  # The time the alert fired.
  timestamp = ndb.DateTimeProperty(indexed=True, auto_now_add=True)

  # Note: -1 denotes an invalid alert and -2 an ignored alert.
  # By default, this is None, which denotes a non-triaged alert.
  bug_id = ndb.IntegerProperty(indexed=True)

  # The sheriff rotation that should handle this alert.
  sheriff = ndb.KeyProperty(kind=sheriff_module.Sheriff, indexed=True)

  # Each Alert is related to one Test.
  test = ndb.KeyProperty(indexed=True)

  # We'd like to be able to query Alerts by Master, Bot, and Benchmark names.
  master_name = ndb.ComputedProperty(
      lambda self: utils.TestPath(self.test).split('/')[0],
      indexed=True)
  bot_name = ndb.ComputedProperty(
      lambda self: utils.TestPath(self.test).split('/')[1],
      indexed=True)
  benchmark_name = ndb.ComputedProperty(
      lambda self: utils.TestPath(self.test).split('/')[2],
      indexed=True)

  # Each Alert has a revision range it's associated with; however,
  # start_revision and end_revision could be the same.
  start_revision = ndb.IntegerProperty(indexed=True)
  end_revision = ndb.IntegerProperty(indexed=True)

  # The group this alert belongs to.
  # TODO(qyearsley): If the old AnomalyGroup entities can be removed and
  # all recent groups have the kind AlertGroup, then the optional argument
  # kind=alert_group.AlertGroup can be added.
  group = ndb.KeyProperty(indexed=True)

  # The revisions to use for display, if different than point id.
  display_start = ndb.IntegerProperty(indexed=False)
  display_end = ndb.IntegerProperty(indexed=False)

  # Ownership data, mapping e-mails to the benchmark's owners' emails and
  # component as the benchmark's Monorail component
  ownership = ndb.JsonProperty()

  def GetTestMetadataKey(self):
    """Get the key for the TestMetadata entity of this alert.

    We are in the process of converting from Test entities to TestMetadata.
    Until this is done, it's possible that an alert may store either Test
    or TestMetadata in the 'test' KeyProperty. This gets the TestMetadata key
    regardless of what's stored.
    """
    return utils.TestMetadataKey(self.test)

  @classmethod
  def GetAlertsForTest(cls, test_key, limit=None):
    return cls.query(cls.test.IN([
        utils.TestMetadataKey(test_key),
        utils.OldStyleTestKey(test_key)])).fetch(limit=limit)
    def post(self):
        """Returns JSON data about a data stoppage for debugging dialog.

    Request parameters:
      key: Key of the data stoppage alert to debug; test_path and rev are
          ignored if key is specified.
      test_path: Test path of the test to debug; ignored if key is specified.
      rev: Point id of the last known revision; ignored if key is specified.

    Outputs:
      JSON which gives as many debugging details as possible.
    """
        stoppage_details = {}
        row = None
        if self.request.get('key'):
            alert = ndb.Key(urlsafe=self.request.get('key')).get()
            if not alert:
                self.response.out.write(
                    json.dumps({'error': 'Invalid alert key'}))
                return
            row = alert.row.get()
        else:
            # Grab row from test_path and rev.
            rev = self.request.get('rev')
            try:
                rev = int(rev)
            except TypeError:
                self.response.out.write(json.dumps({'error': 'Invalid rev'}))
                return
            test_path = self.request.get('test_path')
            if not test_path:
                self.response.out.write(
                    json.dumps({'error': 'No test specified'}))
                return

            row = graph_data.Row.get_by_id(rev,
                                           parent=ndb.Key(
                                               'TestContainer', test_path))

        if not row:
            self.response.out.write(json.dumps({'error': 'No row for alert.'}))
            return

        test_path = utils.TestPath(row.key.parent())
        stoppage_details['test_path'] = test_path

        current_stdio_link = utils.GetStdioLinkFromRow(row)

        if not current_stdio_link:
            self.response.out.write(
                json.dumps({'error': 'Cannot find stdio link.'}))
            return
        _, master, bot, current_buildnumber, step = (
            utils.GetBuildDetailsFromStdioLink(current_stdio_link))
        if not master or not current_buildnumber:
            self.response.out.write(
                json.dumps({'error': 'Cannot parse stdio link.'}))
            return
        next_buildnumber = str(int(current_buildnumber) + 1)
        next_stdio_link = current_stdio_link.replace(current_buildnumber,
                                                     next_buildnumber)

        stoppage_details['current_logdog_uri'] = (
            utils.GetLogdogLogUriFromStdioLink(current_stdio_link))
        stoppage_details['current_buildbot_status_page'] = (
            utils.GetBuildbotStatusPageUriFromStdioLink(current_stdio_link))
        stoppage_details['next_logdog_uri'] = (
            utils.GetLogdogLogUriFromStdioLink(next_stdio_link))
        stoppage_details['next_buildbot_status_page'] = (
            utils.GetBuildbotStatusPageUriFromStdioLink(next_stdio_link))

        current_build_info = milo_service.GetBuildbotBuildInfo(
            master, bot, current_buildnumber)
        stoppage_details['current_commit_pos'] = row.key.id()
        if current_build_info:
            commit_pos_str = current_build_info['properties'][
                'got_revision_cp']
            stoppage_details['current_commit_pos'] = re.match(
                r'.*\{#(\d+)\}', commit_pos_str).group(1)
            current_result = current_build_info['steps'].get(step)
            if current_result:
                current_result = current_result.get('results')
            stoppage_details['current_result'] = current_result

        next_build_info = milo_service.GetBuildbotBuildInfo(
            master, bot, next_buildnumber)
        stoppage_details['next_commit_pos'] = None
        stoppage_details['next_result'] = None
        if next_build_info:
            commit_pos_str = next_build_info['properties']['got_revision_cp']
            stoppage_details['next_commit_pos'] = re.match(
                r'.*\{#(\d+)\}', commit_pos_str).group(1)
            next_result = next_build_info['steps'].get(step)
            if next_result:
                next_result = next_result.get('results')
            stoppage_details['next_result'] = next_result

        self.response.out.write(json.dumps(stoppage_details))
Beispiel #14
0
def _PrefillInfo(test_path):
  """Pre-fills some best guesses config form based on the test path.

  Args:
    test_path: Test path string.

  Returns:
    A dictionary indicating the result. If successful, this should contain the
    the fields "suite", "email", "all_metrics", and "default_metric". If not
    successful this will contain the field "error".
  """
  if not test_path:
    return {'error': 'No test specified'}

  suite_path = '/'.join(test_path.split('/')[:3])
  suite = utils.TestKey(suite_path).get()
  if not suite:
    return {'error': 'Invalid test %s' % test_path}

  graph_path = '/'.join(test_path.split('/')[:4])
  graph_key = utils.TestKey(graph_path)

  info = {'suite': suite.test_name}
  info['master'] = suite.master_name
  info['internal_only'] = suite.internal_only
  info['use_archive'] = _CanDownloadBuilds(suite.master_name)

  info['all_bots'] = _GetAvailableBisectBots(suite.master_name)
  info['bisect_bot'] = GuessBisectBot(suite.master_name, suite.bot_name)

  user = users.get_current_user()
  if not user:
    return {'error': 'User not logged in.'}

  # Secondary check for bisecting internal only tests.
  if suite.internal_only and not utils.IsInternalUser():
    return {'error': 'Unauthorized access, please use corp account to login.'}

  info['email'] = user.email()

  info['all_metrics'] = []
  metric_keys = list_tests.GetTestDescendants(graph_key, has_rows=True)

  should_add_story_filter = (
      suite.test_name not in _NON_TELEMETRY_TEST_COMMANDS and
      # is not a top-level test_path, those are usually not story names
      '/' in test_path)
  test_path_prefix = test_path + '/'

  for metric_key in metric_keys:
    metric_path = utils.TestPath(metric_key)
    if metric_path.endswith('/ref') or metric_path.endswith('_ref'):
      continue
    if metric_path.startswith(test_path_prefix):
      should_add_story_filter = False  # Stories do not have sub-tests.
    info['all_metrics'].append(GuessMetric(metric_path))
  info['default_metric'] = GuessMetric(test_path)

  if should_add_story_filter:
    _, story_name = test_path.rsplit('/', 1)
    if story_name.startswith('after_'):
      # TODO(perezju,#1811): Remove this hack after deprecating the
      # memory.top_10_mobile benchmark.
      story_name = story_name[len('after_'):]
    # During import, some chars in story names got replaced by "_" so they
    # could be safely included in the test_path. At this point we don't know
    # what the original characters were, so we pass a regex where each
    # underscore is replaced back with a match-any-character dot.
    info['story_filter'] = re.sub(r'\\_', '.', re.escape(story_name))
  else:
    info['story_filter'] = ''

  return info
Beispiel #15
0
def _SubTestPath(test_key):
  """Returns the part of a test path starting from after the test suite."""
  full_test_path = utils.TestPath(test_key)
  parts = full_test_path.split('/')
  assert len(parts) > 3
  return parts[3:]
Beispiel #16
0
class Anomaly(internal_only_model.InternalOnlyModel):
    """Represents a change-point or step found in the data series for a test.

  An Anomaly can be an upward or downward change, and can represent an
  improvement or a regression.
  """
    # Whether the alert should only be viewable by internal users.
    internal_only = ndb.BooleanProperty(indexed=True, default=False)

    # The time the alert fired.
    timestamp = ndb.DateTimeProperty(indexed=True, auto_now_add=True)

    # Note: -1 denotes an invalid alert and -2 an ignored alert.
    # By default, this is None, which denotes a non-triaged alert.
    bug_id = ndb.IntegerProperty(indexed=True)

    # The sheriff rotation that should handle this alert.
    sheriff = ndb.KeyProperty(kind=sheriff_module.Sheriff, indexed=True)

    # Each Alert is related to one Test.
    test = ndb.KeyProperty(indexed=True)
    statistic = ndb.StringProperty(indexed=True)

    # We'd like to be able to query Alerts by Master, Bot, and Benchmark names.
    master_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[0], indexed=True)
    bot_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[1], indexed=True)
    benchmark_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[2], indexed=True)

    # Each Alert has a revision range it's associated with; however,
    # start_revision and end_revision could be the same.
    start_revision = ndb.IntegerProperty(indexed=True)
    end_revision = ndb.IntegerProperty(indexed=True)

    # The revisions to use for display, if different than point id.
    display_start = ndb.IntegerProperty(indexed=False)
    display_end = ndb.IntegerProperty(indexed=False)

    # Ownership data, mapping e-mails to the benchmark's owners' emails and
    # component as the benchmark's Monorail component
    ownership = ndb.JsonProperty()

    # The number of points before and after this anomaly that were looked at
    # when finding this anomaly.
    segment_size_before = ndb.IntegerProperty(indexed=False)
    segment_size_after = ndb.IntegerProperty(indexed=False)

    # The medians of the segments before and after the anomaly.
    median_before_anomaly = ndb.FloatProperty(indexed=False)
    median_after_anomaly = ndb.FloatProperty(indexed=False)

    # The standard deviation of the segments before the anomaly.
    std_dev_before_anomaly = ndb.FloatProperty(indexed=False)

    # The number of points that were used in the before/after segments.
    # This is also  returned by FindAnomalies
    window_end_revision = ndb.IntegerProperty(indexed=False)

    # In order to estimate how likely it is that this anomaly is due to noise,
    # t-test may be performed on the points before and after. The t-statistic,
    # degrees of freedom, and p-value are potentially-useful intermediary results.
    t_statistic = ndb.FloatProperty(indexed=False)
    degrees_of_freedom = ndb.FloatProperty(indexed=False)
    p_value = ndb.FloatProperty(indexed=False)

    # Whether this anomaly represents an improvement; if false, this anomaly is
    # considered to be a regression.
    is_improvement = ndb.BooleanProperty(indexed=True, default=False)

    # Whether this anomaly recovered (i.e. if this is a step down, whether there
    # is a corresponding step up later on, or vice versa.)
    recovered = ndb.BooleanProperty(indexed=True, default=False)

    # If the TestMetadata alerted upon has a ref build, store the ref build.
    ref_test = ndb.KeyProperty(indexed=False)

    # The corresponding units from the TestMetaData entity.
    units = ndb.StringProperty(indexed=False)

    recipe_bisects = ndb.KeyProperty(repeated=True, indexed=False)
    pinpoint_bisects = ndb.StringProperty(repeated=True, indexed=False)

    @property
    def percent_changed(self):
        """The percent change from before the anomaly to after."""
        if self.median_before_anomaly == 0.0:
            return sys.float_info.max
        difference = self.median_after_anomaly - self.median_before_anomaly
        return 100 * difference / self.median_before_anomaly

    @property
    def absolute_delta(self):
        """The absolute change from before the anomaly to after."""
        return self.median_after_anomaly - self.median_before_anomaly

    @property
    def direction(self):
        """Whether the change is numerically an increase or decrease."""
        if self.median_before_anomaly < self.median_after_anomaly:
            return UP
        return DOWN

    def GetDisplayPercentChanged(self):
        """Gets a string showing the percent change."""
        if abs(self.percent_changed) == sys.float_info.max:
            return FREAKIN_HUGE
        else:
            return '%.1f%%' % abs(self.percent_changed)

    def GetDisplayAbsoluteChanged(self):
        """Gets a string showing the absolute change."""
        if abs(self.absolute_delta) == sys.float_info.max:
            return FREAKIN_HUGE
        else:
            return '%f' % abs(self.absolute_delta)

    def GetRefTestPath(self):
        if not self.ref_test:
            return None
        return utils.TestPath(self.ref_test)

    def SetIsImprovement(self, test=None):
        """Sets whether the alert is an improvement for the given test."""
        if not test:
            test = self.GetTestMetadataKey().get()
        # |self.direction| is never equal to |UNKNOWN| (see the definition above)
        # so when the test improvement direction is |UNKNOWN|, |self.is_improvement|
        # will be False.
        self.is_improvement = (self.direction == test.improvement_direction)

    def GetTestMetadataKey(self):
        """Get the key for the TestMetadata entity of this alert.

    We are in the process of converting from Test entities to TestMetadata.
    Until this is done, it's possible that an alert may store either Test
    or TestMetadata in the 'test' KeyProperty. This gets the TestMetadata key
    regardless of what's stored.
    """
        return utils.TestMetadataKey(self.test)

    @classmethod
    @ndb.tasklet
    def QueryAsync(cls,
                   bot_name=None,
                   bug_id=None,
                   count_limit=0,
                   deadline_seconds=50,
                   inequality_property=None,
                   is_improvement=None,
                   key=None,
                   keys_only=False,
                   limit=100,
                   master_name=None,
                   max_end_revision=None,
                   max_start_revision=None,
                   max_timestamp=None,
                   min_end_revision=None,
                   min_start_revision=None,
                   min_timestamp=None,
                   recovered=None,
                   sheriff=None,
                   start_cursor=None,
                   test=None,
                   test_keys=None,
                   test_suite_name=None):
        if key:
            # This tasklet isn't allowed to catch the internal_only AssertionError.
            alert = yield ndb.Key(urlsafe=key).get_async()
            raise ndb.Return(([alert], None, 1))

        # post_filters can cause results to be empty, depending on the shape of the
        # data and which filters are applied in the query and which filters are
        # applied after the query. Automatically chase cursors until some results
        # are found, but stay under the request timeout.
        results = []
        deadline = time.time() + deadline_seconds
        while not results and time.time() < deadline:
            query = cls.query()
            if sheriff is not None:
                sheriff_key = ndb.Key('Sheriff', sheriff)
                sheriff_entity = yield sheriff_key.get_async()
                if sheriff_entity:
                    query = query.filter(cls.sheriff == sheriff_key)
            if is_improvement is not None:
                query = query.filter(cls.is_improvement == is_improvement)
            if bug_id is not None:
                if bug_id == '':
                    bug_id = None
                else:
                    bug_id = int(bug_id)
                query = query.filter(cls.bug_id == bug_id)
            if recovered is not None:
                query = query.filter(cls.recovered == recovered)
            if test or test_keys:
                if not test_keys:
                    test_keys = []
                if test:
                    test_keys += [
                        utils.OldStyleTestKey(test),
                        utils.TestMetadataKey(test)
                    ]
                query = query.filter(cls.test.IN(test_keys))
                query = query.order(cls.key)
                inequality_property = 'key'
            if master_name:
                query = query.filter(cls.master_name == master_name)
            if bot_name:
                query = query.filter(cls.bot_name == bot_name)
            if test_suite_name:
                query = query.filter(cls.benchmark_name == test_suite_name)

            query, post_filters = cls._InequalityFilters(
                query, inequality_property, min_end_revision, max_end_revision,
                min_start_revision, max_start_revision, min_timestamp,
                max_timestamp)
            if post_filters:
                keys_only = False
            query = query.order(-cls.timestamp)

            futures = [
                query.fetch_page_async(limit,
                                       start_cursor=start_cursor,
                                       keys_only=keys_only)
            ]
            if count_limit:
                futures.append(query.count_async(count_limit))
            query_duration = timing.WallTimeLogger('query_duration')
            with query_duration:
                yield futures
            results, start_cursor, more = futures[0].get_result()
            if count_limit:
                count = futures[1].get_result()
            else:
                count = len(results)
            logging.info('query_results_count=%d', len(results))
            if results:
                logging.info('duration_per_result=%f',
                             query_duration.seconds / len(results))
            if post_filters:
                results = [
                    alert for alert in results if all(
                        post_filter(alert) for post_filter in post_filters)
                ]
            if not more:
                start_cursor = None
            if not start_cursor:
                break
        raise ndb.Return((results, start_cursor, count))

    @classmethod
    def _InequalityFilters(cls, query, inequality_property, min_end_revision,
                           max_end_revision, min_start_revision,
                           max_start_revision, min_timestamp, max_timestamp):
        # A query cannot have more than one inequality filter.
        # inequality_property allows users to decide which property to filter in the
        # query, which can significantly affect performance. If other inequalities
        # are specified, they will be handled by post_filters.

        # If callers set inequality_property without actually specifying a
        # corresponding inequality filter, then reset the inequality_property and
        # compute it automatically as if it were not specified.
        if inequality_property == 'start_revision':
            if min_start_revision is None and max_start_revision is None:
                inequality_property = None
        elif inequality_property == 'end_revision':
            if min_end_revision is None and max_end_revision is None:
                inequality_property = None
        elif inequality_property == 'timestamp':
            if min_timestamp is None and max_timestamp is None:
                inequality_property = None
        elif inequality_property != 'key':
            inequality_property = None

        if inequality_property is None:
            # Compute a default inequality_property.
            if min_start_revision or max_start_revision:
                inequality_property = 'start_revision'
            elif min_end_revision or max_end_revision:
                inequality_property = 'end_revision'
            elif min_timestamp or max_timestamp:
                inequality_property = 'timestamp'

        post_filters = []
        if not inequality_property:
            return query, post_filters

        if min_start_revision:
            min_start_revision = int(min_start_revision)
            if inequality_property == 'start_revision':
                logging.info('filter:min_start_revision=%d',
                             min_start_revision)
                query = query.filter(cls.start_revision >= min_start_revision)
                query = query.order(cls.start_revision)
            else:
                post_filters.append(
                    lambda a: a.start_revision >= min_start_revision)

        if max_start_revision:
            max_start_revision = int(max_start_revision)
            if inequality_property == 'start_revision':
                logging.info('filter:max_start_revision=%d',
                             max_start_revision)
                query = query.filter(cls.start_revision <= max_start_revision)
                query = query.order(-cls.start_revision)
            else:
                post_filters.append(
                    lambda a: a.start_revision <= max_start_revision)

        if min_end_revision:
            min_end_revision = int(min_end_revision)
            if inequality_property == 'end_revision':
                logging.info('filter:min_end_revision=%d', min_end_revision)
                query = query.filter(cls.end_revision >= min_end_revision)
                query = query.order(cls.end_revision)
            else:
                post_filters.append(
                    lambda a: a.end_revision >= min_end_revision)

        if max_end_revision:
            max_end_revision = int(max_end_revision)
            if inequality_property == 'end_revision':
                logging.info('filter:max_end_revision=%d', max_end_revision)
                query = query.filter(cls.end_revision <= max_end_revision)
                query = query.order(-cls.end_revision)
            else:
                post_filters.append(
                    lambda a: a.end_revision <= max_end_revision)

        if min_timestamp:
            if inequality_property == 'timestamp':
                logging.info('filter:min_timestamp=%d',
                             time.mktime(min_timestamp.utctimetuple()))
                query = query.filter(cls.timestamp >= min_timestamp)
            else:
                post_filters.append(lambda a: a.timestamp >= min_timestamp)

        if max_timestamp:
            if inequality_property == 'timestamp':
                logging.info('filter:max_timestamp=%d',
                             time.mktime(max_timestamp.utctimetuple()))
                query = query.filter(cls.timestamp <= max_timestamp)
            else:
                post_filters.append(lambda a: a.timestamp <= max_timestamp)

        return query, post_filters
Beispiel #17
0
def _ListSubTestCacheKey(test_key):
  """Returns the sub-tests list cache key for a test suite."""
  parts = utils.TestPath(test_key).split('/')
  master, bot, suite = parts[0:3]
  return graph_data.LIST_TESTS_SUBTEST_CACHE_KEY % (master, bot, suite)
Beispiel #18
0
def _ValidateTestPatterns(old_pattern, new_pattern):
    tests = list_tests.GetTestsMatchingPattern(old_pattern, list_entities=True)
    for test in tests:
        old_path = utils.TestPath(test.key)
        _ValidateAndGetNewTestPath(old_path, new_pattern)
Beispiel #19
0
def GetBotNamesFromAlerts(alerts):
    """Gets a set with the names of the bots related to some alerts."""
    # a.test is the key of a TestMetadata entity, and the TestPath is a path like
    # master_name/bot_name/test_suite_name/metric...
    return {utils.TestPath(a.test).split('/')[1] for a in alerts}
Beispiel #20
0
 def GetRefTestPath(self):
     if not self.ref_test:
         return None
     return utils.TestPath(self.ref_test)
Beispiel #21
0
 def testTestPath_Test(self):
     key = ndb.Key('Master', 'm', 'Bot', 'b', 'Test', 'suite', 'Test',
                   'metric')
     self.assertEqual('m/b/suite/metric', utils.TestPath(key))
Beispiel #22
0
class Anomaly(internal_only_model.InternalOnlyModel):
    """Represents a change-point or step found in the data series for a test.

  An Anomaly can be an upward or downward change, and can represent an
  improvement or a regression.
  """
    # Whether the alert should only be viewable by internal users.
    internal_only = ndb.BooleanProperty(indexed=True, default=False)

    # The time the alert fired.
    timestamp = ndb.DateTimeProperty(indexed=True, auto_now_add=True)

    # Note: -1 denotes an invalid alert and -2 an ignored alert.
    # By default, this is None, which denotes a non-triaged alert.
    bug_id = ndb.IntegerProperty(indexed=True)

    # The sheriff rotation that should handle this alert.
    sheriff = ndb.KeyProperty(kind=sheriff_module.Sheriff, indexed=True)

    # Each Alert is related to one Test.
    test = ndb.KeyProperty(indexed=True)

    # We'd like to be able to query Alerts by Master, Bot, and Benchmark names.
    master_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[0], indexed=True)
    bot_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[1], indexed=True)
    benchmark_name = ndb.ComputedProperty(
        lambda self: utils.TestPath(self.test).split('/')[2], indexed=True)

    # Each Alert has a revision range it's associated with; however,
    # start_revision and end_revision could be the same.
    start_revision = ndb.IntegerProperty(indexed=True)
    end_revision = ndb.IntegerProperty(indexed=True)

    # The group this alert belongs to.
    # TODO(qyearsley): If the old AnomalyGroup entities can be removed and
    # all recent groups have the kind AlertGroup, then the optional argument
    # kind=alert_group.AlertGroup can be added.
    group = ndb.KeyProperty(indexed=True)

    # The revisions to use for display, if different than point id.
    display_start = ndb.IntegerProperty(indexed=False)
    display_end = ndb.IntegerProperty(indexed=False)

    # Ownership data, mapping e-mails to the benchmark's owners' emails and
    # component as the benchmark's Monorail component
    ownership = ndb.JsonProperty()

    # The number of points before and after this anomaly that were looked at
    # when finding this anomaly.
    segment_size_before = ndb.IntegerProperty(indexed=False)
    segment_size_after = ndb.IntegerProperty(indexed=False)

    # The medians of the segments before and after the anomaly.
    median_before_anomaly = ndb.FloatProperty(indexed=False)
    median_after_anomaly = ndb.FloatProperty(indexed=False)

    # The standard deviation of the segments before the anomaly.
    std_dev_before_anomaly = ndb.FloatProperty(indexed=False)

    # The number of points that were used in the before/after segments.
    # This is also  returned by FindAnomalies
    window_end_revision = ndb.IntegerProperty(indexed=False)

    # In order to estimate how likely it is that this anomaly is due to noise,
    # t-test may be performed on the points before and after. The t-statistic,
    # degrees of freedom, and p-value are potentially-useful intermediary results.
    t_statistic = ndb.FloatProperty(indexed=False)
    degrees_of_freedom = ndb.FloatProperty(indexed=False)
    p_value = ndb.FloatProperty(indexed=False)

    # Whether this anomaly represents an improvement; if false, this anomaly is
    # considered to be a regression.
    is_improvement = ndb.BooleanProperty(indexed=True, default=False)

    # Whether this anomaly recovered (i.e. if this is a step down, whether there
    # is a corresponding step up later on, or vice versa.)
    recovered = ndb.BooleanProperty(indexed=True, default=False)

    # If the TestMetadata alerted upon has a ref build, store the ref build.
    ref_test = ndb.KeyProperty(indexed=False)

    # The corresponding units from the TestMetaData entity.
    units = ndb.StringProperty(indexed=False)

    recipe_bisects = ndb.KeyProperty(repeated=True, indexed=False)
    pinpoint_bisects = ndb.StringProperty(repeated=True, indexed=False)

    @property
    def percent_changed(self):
        """The percent change from before the anomaly to after."""
        if self.median_before_anomaly == 0.0:
            return sys.float_info.max
        difference = self.median_after_anomaly - self.median_before_anomaly
        return 100 * difference / self.median_before_anomaly

    @property
    def absolute_delta(self):
        """The absolute change from before the anomaly to after."""
        return self.median_after_anomaly - self.median_before_anomaly

    @property
    def direction(self):
        """Whether the change is numerically an increase or decrease."""
        if self.median_before_anomaly < self.median_after_anomaly:
            return UP
        return DOWN

    def GetDisplayPercentChanged(self):
        """Gets a string showing the percent change."""
        if abs(self.percent_changed) == sys.float_info.max:
            return FREAKIN_HUGE
        else:
            return '%.1f%%' % abs(self.percent_changed)

    def GetDisplayAbsoluteChanged(self):
        """Gets a string showing the absolute change."""
        if abs(self.absolute_delta) == sys.float_info.max:
            return FREAKIN_HUGE
        else:
            return '%f' % abs(self.absolute_delta)

    def GetRefTestPath(self):
        if not self.ref_test:
            return None
        return utils.TestPath(self.ref_test)

    def SetIsImprovement(self, test=None):
        """Sets whether the alert is an improvement for the given test."""
        if not test:
            test = self.GetTestMetadataKey().get()
        # |self.direction| is never equal to |UNKNOWN| (see the definition above)
        # so when the test improvement direction is |UNKNOWN|, |self.is_improvement|
        # will be False.
        self.is_improvement = (self.direction == test.improvement_direction)

    def GetTestMetadataKey(self):
        """Get the key for the TestMetadata entity of this alert.

    We are in the process of converting from Test entities to TestMetadata.
    Until this is done, it's possible that an alert may store either Test
    or TestMetadata in the 'test' KeyProperty. This gets the TestMetadata key
    regardless of what's stored.
    """
        return utils.TestMetadataKey(self.test)

    @classmethod
    @ndb.synctasklet
    def GetAlertsForTest(cls, test_key, limit=None):
        result = yield cls.GetAlertsForTestAsync(test_key, limit=limit)
        raise ndb.Return(result)

    @classmethod
    @ndb.tasklet
    def GetAlertsForTestAsync(cls, test_key, limit=None):
        result = yield cls.query(
            cls.test.IN([
                utils.TestMetadataKey(test_key),
                utils.OldStyleTestKey(test_key)
            ])).fetch_async(limit=limit)
        raise ndb.Return(result)
Beispiel #23
0
 def testTestPath_Container(self):
     key = ndb.Key('TestContainer', 'm/b/suite/metric')
     self.assertEqual('m/b/suite/metric', utils.TestPath(key))
def _UpdateDescriptor(test_suite,
                      namespace,
                      start_cursor=None,
                      measurements=(),
                      bots=(),
                      cases=(),
                      case_tags=None):
    logging.info('%s %s %d %d %d', test_suite, namespace, len(measurements),
                 len(bots), len(cases))

    # This function always runs in the taskqueue as an anonymous user.
    if namespace == datastore_hooks.INTERNAL:
        datastore_hooks.SetPrivilegedRequest()

    start = time.time()
    deadline = start + DEADLINE_SECONDS
    key_count = 0
    measurements = set(measurements)
    bots = set(bots)
    cases = set(cases)
    case_tags = case_tags or {}

    # Some test suites have more keys than can fit in memory or can be processed
    # in 10 minutes, so use an iterator instead of a page limit.
    query_iter = _QueryTestSuite(test_suite).iter(keys_only=True,
                                                  produce_cursors=True,
                                                  start_cursor=start_cursor,
                                                  use_cache=False,
                                                  use_memcache=False)
    tags_futures = []

    try:
        for key in query_iter:
            test_path = utils.TestPath(key)
            key_count += 1
            desc = descriptor.Descriptor.FromTestPathSync(test_path)
            bots.add(desc.bot)
            if desc.measurement:
                measurements.add(desc.measurement)
            if desc.test_case:
                if desc.test_case not in cases:
                    cases.add(desc.test_case)
                    tags_futures.append(
                        _QueryCaseTags(test_path, desc.test_case))
            if time.time() > deadline:
                break
    except db.BadRequestError:
        pass

    _CollectCaseTags(tags_futures, case_tags)

    logging.info('%d keys, %d measurements, %d bots, %d cases, %d tags',
                 key_count, len(measurements), len(bots), len(cases),
                 len(case_tags))
    if key_count:
        logging.info('per_key:wall_us=%f',
                     round(1e6 * (time.time() - start) / key_count))

    if query_iter.probably_has_next():
        logging.info('continuing')
        deferred.defer(_UpdateDescriptor, test_suite, namespace,
                       query_iter.cursor_before(), measurements, bots, cases,
                       case_tags)
        return

    desc = {
        'measurements': list(sorted(measurements)),
        'bots': list(sorted(bots)),
        'cases': list(sorted(cases)),
        'caseTags': {tag: sorted(cases)
                     for tag, cases in case_tags.items()}
    }

    key = namespaced_stored_object.NamespaceKey(CacheKey(test_suite),
                                                namespace)
    stored_object.Set(key, desc)
Beispiel #25
0
def GetAnomalyDict(anomaly_entity, bisect_status=None, v2=False):
    """Returns a dictionary for an Anomaly which can be encoded as JSON.

  Args:
    anomaly_entity: An Anomaly entity.
    bisect_status: String status of bisect run.

  Returns:
    A dictionary which is safe to be encoded as JSON.
  """
    test_key = anomaly_entity.GetTestMetadataKey()
    test_path = utils.TestPath(test_key)
    dashboard_link = email_template.GetReportPageLink(
        test_path,
        rev=anomaly_entity.end_revision,
        add_protocol_and_host=False)

    dct = {
        'bug_id': anomaly_entity.bug_id,
        'dashboard_link': dashboard_link,
        'end_revision': anomaly_entity.end_revision,
        'improvement': anomaly_entity.is_improvement,
        'key': anomaly_entity.key.urlsafe(),
        'median_after_anomaly': anomaly_entity.median_after_anomaly,
        'median_before_anomaly': anomaly_entity.median_before_anomaly,
        'recovered': anomaly_entity.recovered,
        'start_revision': anomaly_entity.start_revision,
        'units': anomaly_entity.units,
    }

    if v2:
        bug_labels = set()
        bug_components = set()
        if anomaly_entity.internal_only:
            bug_labels.add('Restrict-View-Google')
        tags = set(bug_label_patterns.GetBugLabelsForTest(test_key))
        subscriptions = [s for s in anomaly_entity.subscriptions]
        tags.update([l for s in subscriptions for l in s.bug_labels])
        bug_components = set(c for s in subscriptions
                             for c in s.bug_components)
        for tag in tags:
            if tag.startswith('Cr-'):
                bug_components.add(tag.replace('Cr-', '').replace('-', '>'))
            else:
                bug_labels.add(tag)

        dct['bug_components'] = list(bug_components)
        dct['bug_labels'] = list(bug_labels)

        desc = descriptor.Descriptor.FromTestPathSync(test_path)
        dct['descriptor'] = {
            'testSuite': desc.test_suite,
            'measurement': desc.measurement,
            'bot': desc.bot,
            'testCase': desc.test_case,
            'statistic': desc.statistic,
        }
        dct['pinpoint_bisects'] = anomaly_entity.pinpoint_bisects
    else:
        test_path_parts = test_path.split('/')
        dct['absolute_delta'] = '%s' % anomaly_entity.GetDisplayAbsoluteChanged(
        )
        dct['bisect_status'] = bisect_status
        dct['bot'] = test_path_parts[1]
        dct['date'] = str(anomaly_entity.timestamp.date())
        dct['display_end'] = anomaly_entity.display_end
        dct['display_start'] = anomaly_entity.display_start
        dct['master'] = test_path_parts[0]
        dct['percent_changed'] = '%s' % anomaly_entity.GetDisplayPercentChanged(
        )
        dct['ref_test'] = anomaly_entity.GetRefTestPath()
        dct['test'] = '/'.join(test_path_parts[3:])
        dct['testsuite'] = test_path_parts[2]
        dct['timestamp'] = anomaly_entity.timestamp.isoformat()
        dct['type'] = 'anomaly'

    return dct
Beispiel #26
0
    def get(self):
        """Gets the page for viewing recently added points.

    Request parameters:
      pattern: A test path pattern with asterisk wildcards (optional).

    Outputs:
      A page showing recently added points.
    """
        # Construct a query for recently added Row entities.
        query = graph_data.Row.query()
        query = query.order(-graph_data.Row.timestamp)

        # If a maximum number of tests was specified, use it; fall back on default.
        try:
            max_tests = int(self.request.get('max_tests', _MAX_MATCHING_TESTS))
        except ValueError:
            max_tests = _MAX_MATCHING_TESTS

        # If a test path pattern was specified, filter the query to include only
        # Row entities that belong to a test that matches the pattern.
        test_pattern = self.request.get('pattern')
        num_originally_matching_tests = 0
        if test_pattern:
            test_paths = list_tests.GetTestsMatchingPattern(
                test_pattern, only_with_rows=True)
            if not test_paths:
                self.RenderHtml(
                    'new_points.html', {
                        'pattern': test_pattern,
                        'error':
                        'No tests matching pattern: %s' % test_pattern,
                    })
                return

            # If test_keys contains too many tests, then this query will exceed a
            # memory limit or time out. So, limit the number of tests and let the
            # user know that this has happened.
            num_originally_matching_tests = len(test_paths)
            if num_originally_matching_tests > max_tests:
                test_paths = test_paths[:max_tests]
            test_keys = map(utils.OldStyleTestKey, test_paths)
            query = query.filter(graph_data.Row.parent_test.IN(test_keys))

        # If a valid number of points was given, use it. Otherwise use the default.
        try:
            num_points = int(
                self.request.get('num_points', _DEFAULT_NUM_POINTS))
        except ValueError:
            num_points = _DEFAULT_NUM_POINTS

        # Fetch the Row entities.
        rows = query.fetch(limit=num_points)

        # Make a list of dicts which will be passed to the template.
        row_dicts = []
        for row in rows:
            row_dicts.append({
                'test':
                utils.TestPath(row.parent_test),
                'added_time':
                row.timestamp.strftime('%Y-%m-%d %H:%M:%S %Z'),
                'revision':
                row.revision,
                'value':
                row.value,
                'error':
                row.error,
            })

        error_message = ''
        if num_originally_matching_tests > max_tests:
            error_message = (
                'Pattern originally matched %s tests; only showing '
                'points from the first %s tests.' %
                (num_originally_matching_tests, max_tests))

        # Render the template with the row information that was fetched.
        self.RenderHtml(
            'new_points.html', {
                'pattern': test_pattern,
                'num_points': num_points,
                'max_tests': max_tests,
                'rows': row_dicts,
                'error': error_message,
            })