Exemple #1
0
    def _FetchRowsForTest(self, test_key):
        test_desc = yield descriptor.Descriptor.FromTestPathAsync(
            utils.TestPath(test_key))
        projection, limit = self._RowQueryProjection(test_desc.statistic)
        query = graph_data.Row.query(projection=projection)
        query = query.filter(graph_data.Row.parent_test == test_key)
        query = self._FilterRowQuery(query)

        with timing.WallTimeLogger('fetch_test'):
            rows = yield query.fetch_async(limit)

        with timing.CpuTimeLogger('rows'):
            for row in rows:
                # Sometimes the dev environment just ignores some filters.
                if self._min_revision and row.revision < self._min_revision:
                    continue
                if self._min_timestamp and row.timestamp < self._min_timestamp:
                    continue
                if self._max_revision and row.revision > self._max_revision:
                    continue
                if self._max_timestamp and row.timestamp > self._max_timestamp:
                    continue
                datum = self._Datum(row.revision)
                if test_desc.statistic is None:
                    datum['avg'] = self.Round(row.value)
                    if hasattr(row, 'error') and row.error:
                        datum['std'] = self.Round(row.error)
                else:
                    datum[test_desc.statistic] = self.Round(row.value)
                for stat in self._statistic_columns:
                    if hasattr(row, 'd_' + stat):
                        datum[stat] = self.Round(getattr(row, 'd_' + stat))
                if 'timestamp' in self._columns:
                    datum['timestamp'] = row.timestamp.isoformat()
                if 'revisions' in self._columns:
                    datum['revisions'] = {
                        attr: value
                        for attr, value in row.to_dict().items()
                        if attr.startswith('r_')
                    }
                if 'annotations' in self._columns:
                    datum['annotations'] = {
                        attr: value
                        for attr, value in row.to_dict().items()
                        if attr.startswith('a_')
                    }

        if 'histogram' in self._columns and test_desc.statistic == None:
            with timing.WallTimeLogger('fetch_histograms'):
                yield [
                    self._FetchHistogram(test_key, row.revision)
                    for row in rows
                ]
Exemple #2
0
def _LoadHistogramList(input_file):
  """Incremental file decoding and JSON parsing when handling new histograms.

  This helper function takes an input file which yields fragments of JSON
  encoded histograms then incrementally builds the list of histograms to return
  the fully formed list in the end.

  Returns
    This function returns an instance of a list() containing dict()s decoded
    from the input_file.

  Raises
    This function may raise ValueError instances if we end up not finding valid
    JSON fragments inside the file.
  """
  try:
    with timing.WallTimeLogger('json.load'):
      def NormalizeDecimals(obj):
        # Traverse every object in obj to turn Decimal objects into floats.
        if isinstance(obj, decimal.Decimal):
          return float(obj)
        if isinstance(obj, dict):
          for k, v in obj.iteritems():
            obj[k] = NormalizeDecimals(v)
        if isinstance(obj, list):
          obj = [NormalizeDecimals(x) for x in obj]
        return obj

      objects = [NormalizeDecimals(x) for x in ijson.items(input_file, 'item')]

  except ijson.JSONError as e:
    # Wrap exception in a ValueError
    raise ValueError('Failed to parse JSON: %s' % (e))

  return objects
Exemple #3
0
 def _FetchHistogramsForTest(self, test):
   query = graph_data.Row.query(graph_data.Row.parent_test == test)
   query = self._FilterRowQuery(query)
   with timing.WallTimeLogger('fetch_histograms'):
     row_keys = yield query.fetch_async(HISTOGRAMS_QUERY_LIMIT, keys_only=True)
     yield [self._FetchHistogram(test, row_key.integer_id())
            for row_key in row_keys]
    def AuthorizedPost(self):
        datastore_hooks.SetPrivilegedRequest()

        with timing.WallTimeLogger('decompress'):
            try:
                data_str = self.request.body
                zlib.decompress(data_str)
                logging.info('Recieved compressed data.')
            except zlib.error:
                data_str = self.request.get('data')
                data_str = zlib.compress(data_str)
                logging.info('Recieved uncompressed data.')

        if not data_str:
            raise api_request_handler.BadRequestError(
                'Missing "data" parameter')

        filename = uuid.uuid4()
        params = {'gcs_file_path': '/add-histograms-cache/%s' % filename}

        gcs_file = cloudstorage.open(params['gcs_file_path'],
                                     'w',
                                     content_type='application/octet-stream',
                                     retry_params=_RETRY_PARAMS)
        gcs_file.write(data_str)
        gcs_file.close()

        retry_options = taskqueue.TaskRetryOptions(
            task_retry_limit=_TASK_RETRY_LIMIT)
        queue = taskqueue.Queue('default')
        queue.add(
            taskqueue.Task(url='/add_histograms/process',
                           payload=json.dumps(params),
                           retry_options=retry_options))
Exemple #5
0
def GetReport(template_id, revisions):
    with timing.WallTimeLogger('GetReport'), timing.CpuTimeLogger('GetReport'):
        try:
            template = ndb.Key('ReportTemplate', template_id).get()
        except AssertionError:
            # InternalOnlyModel._post_get_hook asserts that the user can access the
            # entity.
            return None

        result = {'editable': False}
        if template:
            result['owners'] = template.owners
            result['editable'] = utils.GetEmail() in template.owners
            result['report'] = report_query.ReportQuery(
                template.template, revisions).FetchSync()
        else:
            for handler in ListStaticTemplates():
                if handler.template.key.id() != template_id:
                    continue
                template = handler.template
                report = handler(revisions)
                if isinstance(report, report_query.ReportQuery):
                    report = report.FetchSync()
                result['report'] = report
                break
            if template is None:
                return None

        result['id'] = template.key.id()
        result['name'] = template.name
        result['internal'] = template.internal_only
        return result
Exemple #6
0
  def _FetchTests(self):
    with timing.WallTimeLogger('fetch_tests'):
      tests = yield [key.get_async() for key in self._test_keys]
    tests = [test for test in tests if test]
    if not tests:
      raise api_request_handler.NotFoundError

    improvement_direction = None
    for test in tests:
      if test.internal_only:
        self._private = True

      test_desc = yield descriptor.Descriptor.FromTestPathAsync(
          utils.TestPath(test.key))
      # The unit for 'count' statistics is trivially always 'count'. Callers
      # certainly want the units of the measurement, which is the same as the
      # units of the 'avg' and 'std' statistics.
      if self._units is None or test_desc.statistic != 'count':
        self._units = test.units
        improvement_direction = test.improvement_direction

    if improvement_direction == anomaly.DOWN:
      self._improvement_direction = 'down'
    elif improvement_direction == anomaly.UP:
      self._improvement_direction = 'up'
    else:
      self._improvement_direction = None
Exemple #7
0
    def Post(self):
        if utils.IsDevAppserver():
            # Don't require developers to zip the body.
            # In prod, the data will be written to cloud storage and processed on the
            # taskqueue, so the caller will not see any errors. In dev_appserver,
            # process the data immediately so the caller will see errors.
            # Also always create upload completion token for such requests.
            token, token_info = self._CreateUploadCompletionToken()
            ProcessHistogramSet(
                _LoadHistogramList(StringIO.StringIO(self.request.body)),
                token)
            token.UpdateStateAsync(
                upload_completion_token.State.COMPLETED).wait()
            return token_info

        with timing.WallTimeLogger('decompress'):
            try:
                data_str = self.request.body

                # Try to decompress at most 100 bytes from the data, only to determine
                # if we've been given compressed payload.
                zlib.decompressobj().decompress(data_str, 100)
                logging.info('Received compressed data.')
            except zlib.error:
                data_str = self.request.get('data')
                if not data_str:
                    raise api_request_handler.BadRequestError(
                        'Missing or uncompressed data.')
                data_str = zlib.compress(data_str)
                logging.info('Received uncompressed data.')

        if not data_str:
            raise api_request_handler.BadRequestError(
                'Missing "data" parameter')

        filename = uuid.uuid4()
        params = {'gcs_file_path': '/add-histograms-cache/%s' % filename}

        gcs_file = cloudstorage.open(params['gcs_file_path'],
                                     'w',
                                     content_type='application/octet-stream',
                                     retry_params=_RETRY_PARAMS)
        gcs_file.write(data_str)
        gcs_file.close()

        token_info = None
        if utils.ShouldTurnOnUploadCompletionTokenExperiment():
            _, token_info = self._CreateUploadCompletionToken(
                params['gcs_file_path'])
            params['upload_completion_token'] = token_info['token']

        retry_options = taskqueue.TaskRetryOptions(
            task_retry_limit=_TASK_RETRY_LIMIT)
        queue = taskqueue.Queue('default')
        queue.add(
            taskqueue.Task(url='/add_histograms/process',
                           payload=json.dumps(params),
                           retry_options=retry_options))
        return token_info
    def AuthorizedPost(self):
        datastore_hooks.SetPrivilegedRequest()

        with timing.WallTimeLogger('decompress'):
            try:
                data_str = zlib.decompress(self.request.body)
                logging.info('Recieved compressed data.')
            except zlib.error:
                data_str = self.request.get('data')
                logging.info('Recieved uncompressed data.')
        if not data_str:
            raise api_request_handler.BadRequestError(
                'Missing "data" parameter')

        logging.info('Received data: %s', data_str[:200])

        with timing.WallTimeLogger('json.loads'):
            histogram_dicts = json.loads(data_str)
        ProcessHistogramSet(histogram_dicts)
Exemple #9
0
def List():
    with timing.WallTimeLogger('List'), timing.CpuTimeLogger('List'):
        templates = ReportTemplate.query().fetch()
        templates += [handler.template for handler in ListStaticTemplates()]
        templates = [{
            'id': template.key.id(),
            'name': template.name,
            'modified': template.modified.isoformat(),
        } for template in templates]
        return sorted(templates, key=lambda d: d['name'])
Exemple #10
0
def _LoadTaskGraph(job):
    with timing.WallTimeLogger('ExecutionEngine:_LoadTaskGraph'):
        tasks = Task.query(ancestor=job.key).fetch()
        # The way we get the terminal tasks is by looking at tasks where nothing
        # depends on them.
        has_dependents = set()
        for task in tasks:
            has_dependents |= set(task.dependencies)
        terminal_tasks = [t.key for t in tasks if t.key not in has_dependents]
        return ReconstitutedTaskGraph(terminal_tasks=terminal_tasks,
                                      tasks={task.key: task
                                             for task in tasks})
    def Post(self):
        if utils.IsDevAppserver():
            # Don't require developers to zip the body.
            # In prod, the data will be written to cloud storage and processed on the
            # taskqueue, so the caller will not see any errors. In dev_appserver,
            # process the data immediately so the caller will see errors.
            ProcessHistogramSet(json.loads(self.request.body))
            return

        with timing.WallTimeLogger('decompress'):
            try:
                data_str = self.request.body

                # Try to decompress at most 100 bytes from the data, only to determine
                # if we've been given compressed payload.
                zlib.decompressobj().decompress(data_str, 100)
                logging.info('Received compressed data.')
            except zlib.error:
                data_str = self.request.get('data')
                if not data_str:
                    raise api_request_handler.BadRequestError(
                        'Missing or uncompressed data.')
                data_str = zlib.compress(data_str)
                logging.info('Received uncompressed data.')

        if not data_str:
            raise api_request_handler.BadRequestError(
                'Missing "data" parameter')

        filename = uuid.uuid4()
        params = {'gcs_file_path': '/add-histograms-cache/%s' % filename}

        gcs_file = cloudstorage.open(params['gcs_file_path'],
                                     'w',
                                     content_type='application/octet-stream',
                                     retry_params=_RETRY_PARAMS)
        gcs_file.write(data_str)
        gcs_file.close()

        retry_options = taskqueue.TaskRetryOptions(
            task_retry_limit=_TASK_RETRY_LIMIT)
        queue = taskqueue.Queue('default')
        queue.add(
            taskqueue.Task(url='/add_histograms/process',
                           payload=json.dumps(params),
                           retry_options=retry_options))
Exemple #12
0
  def post(self):
    datastore_hooks.SetPrivilegedRequest()

    try:
      params = json.loads(self.request.body)
      gcs_file_path = params['gcs_file_path']

      try:
        gcs_file = cloudstorage.open(
            gcs_file_path, 'r', retry_params=_RETRY_PARAMS)
        contents = gcs_file.read()
        data_str = zlib.decompress(contents)
        gcs_file.close()
      finally:
        cloudstorage.delete(gcs_file_path, retry_params=_RETRY_PARAMS)

      with timing.WallTimeLogger('json.loads'):
        histogram_dicts = json.loads(data_str)

      ProcessHistogramSet(histogram_dicts)
    except Exception as e: # pylint: disable=broad-except
      logging.error('Error processing histograms: %r', e.message)
      self.response.out.write(json.dumps({'error': e.message}))
Exemple #13
0
def ProcessHistogramSet(histogram_dicts, completion_token=None):
    if not isinstance(histogram_dicts, list):
        raise api_request_handler.BadRequestError(
            'HistogramSet JSON must be a list of dicts')

    histograms = histogram_set.HistogramSet()

    with timing.WallTimeLogger('hs.ImportDicts'):
        histograms.ImportDicts(histogram_dicts)

    with timing.WallTimeLogger('hs.DeduplicateDiagnostics'):
        histograms.DeduplicateDiagnostics()

    if len(histograms) == 0:
        raise api_request_handler.BadRequestError(
            'HistogramSet JSON must contain at least one histogram.')

    with timing.WallTimeLogger('hs._LogDebugInfo'):
        _LogDebugInfo(histograms)

    with timing.WallTimeLogger('InlineDenseSharedDiagnostics'):
        InlineDenseSharedDiagnostics(histograms)

    # TODO(#4242): Get rid of this.
    # https://github.com/catapult-project/catapult/issues/4242
    with timing.WallTimeLogger('_PurgeHistogramBinData'):
        _PurgeHistogramBinData(histograms)

    with timing.WallTimeLogger('_GetDiagnosticValue calls'):
        master = _GetDiagnosticValue(reserved_infos.MASTERS.name,
                                     histograms.GetFirstHistogram())
        bot = _GetDiagnosticValue(reserved_infos.BOTS.name,
                                  histograms.GetFirstHistogram())
        benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name,
                                        histograms.GetFirstHistogram())
        benchmark_description = _GetDiagnosticValue(
            reserved_infos.BENCHMARK_DESCRIPTIONS.name,
            histograms.GetFirstHistogram(),
            optional=True)

    with timing.WallTimeLogger('_ValidateMasterBotBenchmarkName'):
        _ValidateMasterBotBenchmarkName(master, bot, benchmark)

    with timing.WallTimeLogger('ComputeRevision'):
        suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark))
        logging.info('Suite: %s', suite_key.id())

        revision = ComputeRevision(histograms)
        logging.info('Revision: %s', revision)

        internal_only = graph_data.Bot.GetInternalOnlySync(master, bot)

    revision_record = histogram.HistogramRevisionRecord.GetOrCreate(
        suite_key, revision)
    revision_record.put()

    last_added = histogram.HistogramRevisionRecord.GetLatest(
        suite_key).get_result()

    # On first upload, a query immediately following a put may return nothing.
    if not last_added:
        last_added = revision_record

    _CheckRequest(last_added, 'No last revision')

    # We'll skip the histogram-level sparse diagnostics because we need to
    # handle those with the histograms, below, so that we can properly assign
    # test paths.
    with timing.WallTimeLogger('FindSuiteLevelSparseDiagnostics'):
        suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics(
            histograms, suite_key, revision, internal_only)

    # TODO(896856): Refactor master/bot computation to happen above this line
    # so that we can replace with a DiagnosticRef rather than a full diagnostic.
    with timing.WallTimeLogger('DeduplicateAndPut'):
        new_guids_to_old_diagnostics = (
            histogram.SparseDiagnostic.FindOrInsertDiagnostics(
                suite_level_sparse_diagnostic_entities, suite_key, revision,
                last_added.revision).get_result())

    with timing.WallTimeLogger('ReplaceSharedDiagnostic calls'):
        for new_guid, old_diagnostic in new_guids_to_old_diagnostics.items():
            histograms.ReplaceSharedDiagnostic(
                new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic))

    with timing.WallTimeLogger('_CreateHistogramTasks'):
        tasks = _CreateHistogramTasks(suite_key.id(), histograms, revision,
                                      benchmark_description, completion_token)

    with timing.WallTimeLogger('_QueueHistogramTasks'):
        _QueueHistogramTasks(tasks)
Exemple #14
0
    def QueryAsync(cls,
                   bot_name=None,
                   bug_id=None,
                   count_limit=0,
                   deadline_seconds=50,
                   inequality_property=None,
                   is_improvement=None,
                   key=None,
                   keys_only=False,
                   limit=100,
                   master_name=None,
                   max_end_revision=None,
                   max_start_revision=None,
                   max_timestamp=None,
                   min_end_revision=None,
                   min_start_revision=None,
                   min_timestamp=None,
                   recovered=None,
                   subscriptions=None,
                   start_cursor=None,
                   test=None,
                   test_keys=None,
                   test_suite_name=None,
                   project_id=None):
        if key:
            # This tasklet isn't allowed to catch the internal_only AssertionError.
            alert = yield ndb.Key(urlsafe=key).get_async()
            raise ndb.Return(([alert], None, 1))

        # post_filters can cause results to be empty, depending on the shape of the
        # data and which filters are applied in the query and which filters are
        # applied after the query. Automatically chase cursors until some results
        # are found, but stay under the request timeout.
        results = []
        deadline = time.time() + deadline_seconds
        while not results and time.time() < deadline:
            query = cls.query()
            equality_properties = []
            if subscriptions:  # Empty subscriptions is not allowed in query
                query = query.filter(cls.subscription_names.IN(subscriptions))
                equality_properties.append('subscription_names')
                inequality_property = 'key'
            if is_improvement is not None:
                query = query.filter(cls.is_improvement == is_improvement)
                equality_properties.append('is_improvement')
                inequality_property = 'key'
            if bug_id is not None:
                if bug_id == '':
                    query = query.filter(cls.bug_id == None)
                    equality_properties.append('bug_id')
                    inequality_property = 'key'
                elif bug_id != '*':
                    query = query.filter(cls.bug_id == int(bug_id))
                    equality_properties.append('bug_id')
                    inequality_property = 'key'
                # bug_id='*' translates to bug_id != None, which is handled with the
                # other inequality filters.
            if project_id is not None:
                query = query.filter(cls.project_id == project_id)
                equality_properties.append('project_id')
                inequality_property = 'key'
            if recovered is not None:
                query = query.filter(cls.recovered == recovered)
                equality_properties.append('recovered')
                inequality_property = 'key'
            if test or test_keys:
                if not test_keys:
                    test_keys = []
                if test:
                    test_keys += [
                        utils.OldStyleTestKey(test),
                        utils.TestMetadataKey(test)
                    ]
                query = query.filter(cls.test.IN(test_keys))
                query = query.order(cls.key)
                equality_properties.append('test')
                inequality_property = 'key'
            if master_name:
                query = query.filter(cls.master_name == master_name)
                equality_properties.append('master_name')
                inequality_property = 'key'
            if bot_name:
                query = query.filter(cls.bot_name == bot_name)
                equality_properties.append('bot_name')
                inequality_property = 'key'
            if test_suite_name:
                query = query.filter(cls.benchmark_name == test_suite_name)
                equality_properties.append('benchmark_name')
                inequality_property = 'key'

            query, post_filters = cls._InequalityFilters(
                query, equality_properties, inequality_property, bug_id,
                min_end_revision, max_end_revision, min_start_revision,
                max_start_revision, min_timestamp, max_timestamp)
            if post_filters:
                keys_only = False
            query = query.order(-cls.timestamp, cls.key)

            futures = [
                query.fetch_page_async(limit,
                                       start_cursor=start_cursor,
                                       keys_only=keys_only)
            ]
            if count_limit:
                futures.append(query.count_async(count_limit))
            query_duration = timing.WallTimeLogger('query_duration')
            with query_duration:
                yield futures
            results, start_cursor, more = futures[0].get_result()
            if count_limit:
                count = futures[1].get_result()
            else:
                count = len(results)
            logging.info('query_results_count=%d', len(results))
            if results:
                logging.info('duration_per_result=%f',
                             query_duration.seconds / len(results))
            if post_filters:
                results = [
                    alert for alert in results if all(
                        post_filter(alert) for post_filter in post_filters)
                ]
            if not more:
                start_cursor = None
            if not start_cursor:
                break
        raise ndb.Return((results, start_cursor, count))
Exemple #15
0
 def _FetchDiagnostics(self):
   with timing.WallTimeLogger('fetch_diagnosticss'):
     yield [self._FetchDiagnosticsForTest(test)
            for test in self._unsuffixed_test_metadata_keys]
Exemple #16
0
    def QueryAsync(cls,
                   bot_name=None,
                   bug_id=None,
                   count_limit=0,
                   deadline_seconds=50,
                   inequality_property=None,
                   is_improvement=None,
                   key=None,
                   keys_only=False,
                   limit=100,
                   master_name=None,
                   max_end_revision=None,
                   max_start_revision=None,
                   max_timestamp=None,
                   min_end_revision=None,
                   min_start_revision=None,
                   min_timestamp=None,
                   recovered=None,
                   sheriff=None,
                   start_cursor=None,
                   test=None,
                   test_suite_name=None):
        if key:
            # This tasklet isn't allowed to catch the internal_only AssertionError.
            alert = yield ndb.Key(urlsafe=key).get_async()
            raise ndb.Return(([alert], None, 1))

        # post_filters can cause results to be empty, depending on the shape of the
        # data and which filters are applied in the query and which filters are
        # applied after the query. Automatically chase cursors until some results
        # are found, but stay under the request timeout.
        results = []
        deadline = time.time() + deadline_seconds
        while not results and time.time() < deadline:
            query = cls.query()
            if sheriff is not None:
                sheriff_key = ndb.Key('Sheriff', sheriff)
                sheriff_entity = yield sheriff_key.get_async()
                if sheriff_entity:
                    query = query.filter(cls.sheriff == sheriff_key)
            if is_improvement is not None:
                query = query.filter(cls.is_improvement == is_improvement)
            if bug_id is not None:
                if bug_id == '':
                    bug_id = None
                else:
                    bug_id = int(bug_id)
                query = query.filter(cls.bug_id == bug_id)
            if recovered is not None:
                query = query.filter(cls.recovered == recovered)
            if test:
                query = query.filter(
                    cls.test.IN([
                        utils.OldStyleTestKey(test),
                        utils.TestMetadataKey(test)
                    ]))
                query = query.order(cls.key)
            if master_name:
                query = query.filter(cls.master_name == master_name)
            if bot_name:
                query = query.filter(cls.bot_name == bot_name)
            if test_suite_name:
                query = query.filter(cls.benchmark_name == test_suite_name)

            query, post_filters = cls._InequalityFilters(
                query, inequality_property, min_end_revision, max_end_revision,
                min_start_revision, max_start_revision, min_timestamp,
                max_timestamp)
            if post_filters:
                keys_only = False
            query = query.order(-cls.timestamp)

            futures = [
                query.fetch_page_async(limit,
                                       start_cursor=start_cursor,
                                       keys_only=keys_only)
            ]
            if count_limit:
                futures.append(query.count_async(count_limit))
            query_duration = timing.WallTimeLogger('query_duration')
            with query_duration:
                yield futures
            results, start_cursor, more = futures[0].get_result()
            if count_limit:
                count = futures[1].get_result()
            else:
                count = len(results)
            logging.info('query_results_count=%d', len(results))
            if results:
                logging.info('duration_per_result=%f',
                             query_duration.seconds / len(results))
            if post_filters:
                results = [
                    alert for alert in results if all(
                        post_filter(alert) for post_filter in post_filters)
                ]
            if not more:
                start_cursor = None
            if not start_cursor:
                break
        raise ndb.Return((results, start_cursor, count))
Exemple #17
0
def Evaluate(job, event, evaluator):
    """Applies an evaluator given a task in the task graph and an event as input.

  This function implements a depth-first search traversal of the task graph and
  applies the `evaluator` given a task and the event input in post-order
  traversal. We start the DFS from the terminal tasks (those that don't have
  dependencies) and call the `evaluator` function with a representation of the
  task in the graph, an `event` as input, and an accumulator argument.

  The `evaluator` must be a callable which accepts three arguments:

    - task: an InMemoryTask instance, representing a task in the graph.
    - event: an object whose shape/type is defined by the caller of the
      `Evaluate` function and that the evaluator can handle.
    - accumulator: a dictionary which is mutable which is valid in the scope of
      a traversal of the graph.

  The `evaluator` must return either None or an iterable of callables which take
  a single argument, which is the accumulator at the end of a traversal.

  Events are free-form but usually are dictionaries which constitute inputs that
  are external to the task graph evaluation. This could model events in an
  event-driven evaluation of tasks, or synthetic inputs to the system. It is
  more important that the `event` information is known to the evaluator
  implementation, and is provided as-is to the evaluator in this function.

  The Evaluate function will keep iterating while there are actions still being
  produced by the evaluator. When there are no more actions to run, the Evaluate
  function will return the most recent traversal's accumulator.
  """
    if job is None:
        raise ValueError('job must not be None.')

    accumulator = {}
    actions = [NoopAction()]
    while actions:
        for action in actions:
            logging.debug('Running action: %s', action)
            # Each action should be a callable which takes the accumulator as an
            # input. We want to run each action in their own transaction as well.
            # This must not be called in a transaction.
            with timing.WallTimeLogger('ExecutionEngine:ActionRunner<%s>' %
                                       (type(action).__name__, )):
                action(accumulator)

        # Clear the actions and accumulator for this traversal.
        del actions[:]
        accumulator.clear()

        # Load the graph transactionally.
        graph = _LoadTaskGraph(job)

        if not graph.tasks:
            logging.debug('Task graph empty for job %s', job.job_id)
            return

        # First get all the "terminal" tasks, and traverse the dependencies in a
        # depth-first-search.
        task_stack = [graph.tasks[task] for task in graph.terminal_tasks]

        # If the stack is empty, we should start at an arbitrary point.
        if not task_stack:
            task_stack = [graph.tasks.values()[0]]
        vertex_states = {}
        while task_stack:
            task = task_stack[-1]
            state = vertex_states.get(task.key, NOT_EVALUATED)
            if state == CHILDREN_PENDING:
                in_memory_task = task.ToInMemoryTask()
                result_actions = evaluator(in_memory_task, event, accumulator)
                if result_actions:
                    actions.extend(result_actions)
                vertex_states[task.key] = EVALUATION_DONE
            elif state == NOT_EVALUATED:
                # This vertex is coloured white, we should traverse the dependencies.
                vertex_states[task.key] = CHILDREN_PENDING
                for dependency in task.dependencies:
                    if vertex_states.get(dependency,
                                         NOT_EVALUATED) == NOT_EVALUATED:
                        task_stack.append(graph.tasks[dependency])
            else:
                assert state == EVALUATION_DONE
                task_stack.pop()

    return accumulator
def ProcessHistogramSet(histogram_dicts):
    if not isinstance(histogram_dicts, list):
        raise api_request_handler.BadRequestError(
            'HistogramSet JSON much be a list of dicts')

    bot_whitelist_future = stored_object.GetAsync(
        add_point_queue.BOT_WHITELIST_KEY)

    histograms = histogram_set.HistogramSet()

    with timing.WallTimeLogger('hs.ImportDicts'):
        histograms.ImportDicts(histogram_dicts)

    with timing.WallTimeLogger('hs.ResolveRelatedHistograms'):
        histograms.ResolveRelatedHistograms()

    with timing.WallTimeLogger('hs.DeduplicateDiagnostics'):
        histograms.DeduplicateDiagnostics()

    if len(histograms) == 0:
        raise api_request_handler.BadRequestError(
            'HistogramSet JSON must contain at least one histogram.')

    with timing.WallTimeLogger('hs._LogDebugInfo'):
        _LogDebugInfo(histograms)

    with timing.WallTimeLogger('InlineDenseSharedDiagnostics'):
        InlineDenseSharedDiagnostics(histograms)

    # TODO(eakuefner): Get rid of this.
    # https://github.com/catapult-project/catapult/issues/4242
    with timing.WallTimeLogger('_PurgeHistogramBinData'):
        _PurgeHistogramBinData(histograms)

    with timing.WallTimeLogger('_GetDiagnosticValue calls'):
        master = _GetDiagnosticValue(reserved_infos.MASTERS.name,
                                     histograms.GetFirstHistogram())
        bot = _GetDiagnosticValue(reserved_infos.BOTS.name,
                                  histograms.GetFirstHistogram())
        benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name,
                                        histograms.GetFirstHistogram())
        benchmark_description = _GetDiagnosticValue(
            reserved_infos.BENCHMARK_DESCRIPTIONS.name,
            histograms.GetFirstHistogram(),
            optional=True)

    with timing.WallTimeLogger('_ValidateMasterBotBenchmarkName'):
        _ValidateMasterBotBenchmarkName(master, bot, benchmark)

    with timing.WallTimeLogger('ComputeRevision'):
        suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark))

        logging.info('Suite: %s', suite_key.id())

        revision = ComputeRevision(histograms)

        bot_whitelist = bot_whitelist_future.get_result()
        internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist)

    # We'll skip the histogram-level sparse diagnostics because we need to
    # handle those with the histograms, below, so that we can properly assign
    # test paths.
    with timing.WallTimeLogger('FindSuiteLevelSparseDiagnostics'):
        suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics(
            histograms, suite_key, revision, internal_only)

    # TODO(eakuefner): Refactor master/bot computation to happen above this line
    # so that we can replace with a DiagnosticRef rather than a full diagnostic.
    with timing.WallTimeLogger('DeduplicateAndPut'):
        new_guids_to_old_diagnostics = DeduplicateAndPut(
            suite_level_sparse_diagnostic_entities, suite_key, revision)
    with timing.WallTimeLogger('ReplaceSharedDiagnostic calls'):
        for new_guid, old_diagnostic in new_guids_to_old_diagnostics.iteritems(
        ):
            histograms.ReplaceSharedDiagnostic(
                new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic))

    with timing.WallTimeLogger('_BatchHistogramsIntoTasks'):
        tasks = _BatchHistogramsIntoTasks(suite_key.id(), histograms, revision,
                                          benchmark_description)

    with timing.WallTimeLogger('_QueueHistogramTasks'):
        _QueueHistogramTasks(tasks)