def _FetchRowsForTest(self, test_key): test_desc = yield descriptor.Descriptor.FromTestPathAsync( utils.TestPath(test_key)) projection, limit = self._RowQueryProjection(test_desc.statistic) query = graph_data.Row.query(projection=projection) query = query.filter(graph_data.Row.parent_test == test_key) query = self._FilterRowQuery(query) with timing.WallTimeLogger('fetch_test'): rows = yield query.fetch_async(limit) with timing.CpuTimeLogger('rows'): for row in rows: # Sometimes the dev environment just ignores some filters. if self._min_revision and row.revision < self._min_revision: continue if self._min_timestamp and row.timestamp < self._min_timestamp: continue if self._max_revision and row.revision > self._max_revision: continue if self._max_timestamp and row.timestamp > self._max_timestamp: continue datum = self._Datum(row.revision) if test_desc.statistic is None: datum['avg'] = self.Round(row.value) if hasattr(row, 'error') and row.error: datum['std'] = self.Round(row.error) else: datum[test_desc.statistic] = self.Round(row.value) for stat in self._statistic_columns: if hasattr(row, 'd_' + stat): datum[stat] = self.Round(getattr(row, 'd_' + stat)) if 'timestamp' in self._columns: datum['timestamp'] = row.timestamp.isoformat() if 'revisions' in self._columns: datum['revisions'] = { attr: value for attr, value in row.to_dict().items() if attr.startswith('r_') } if 'annotations' in self._columns: datum['annotations'] = { attr: value for attr, value in row.to_dict().items() if attr.startswith('a_') } if 'histogram' in self._columns and test_desc.statistic == None: with timing.WallTimeLogger('fetch_histograms'): yield [ self._FetchHistogram(test_key, row.revision) for row in rows ]
def _LoadHistogramList(input_file): """Incremental file decoding and JSON parsing when handling new histograms. This helper function takes an input file which yields fragments of JSON encoded histograms then incrementally builds the list of histograms to return the fully formed list in the end. Returns This function returns an instance of a list() containing dict()s decoded from the input_file. Raises This function may raise ValueError instances if we end up not finding valid JSON fragments inside the file. """ try: with timing.WallTimeLogger('json.load'): def NormalizeDecimals(obj): # Traverse every object in obj to turn Decimal objects into floats. if isinstance(obj, decimal.Decimal): return float(obj) if isinstance(obj, dict): for k, v in obj.iteritems(): obj[k] = NormalizeDecimals(v) if isinstance(obj, list): obj = [NormalizeDecimals(x) for x in obj] return obj objects = [NormalizeDecimals(x) for x in ijson.items(input_file, 'item')] except ijson.JSONError as e: # Wrap exception in a ValueError raise ValueError('Failed to parse JSON: %s' % (e)) return objects
def _FetchHistogramsForTest(self, test): query = graph_data.Row.query(graph_data.Row.parent_test == test) query = self._FilterRowQuery(query) with timing.WallTimeLogger('fetch_histograms'): row_keys = yield query.fetch_async(HISTOGRAMS_QUERY_LIMIT, keys_only=True) yield [self._FetchHistogram(test, row_key.integer_id()) for row_key in row_keys]
def AuthorizedPost(self): datastore_hooks.SetPrivilegedRequest() with timing.WallTimeLogger('decompress'): try: data_str = self.request.body zlib.decompress(data_str) logging.info('Recieved compressed data.') except zlib.error: data_str = self.request.get('data') data_str = zlib.compress(data_str) logging.info('Recieved uncompressed data.') if not data_str: raise api_request_handler.BadRequestError( 'Missing "data" parameter') filename = uuid.uuid4() params = {'gcs_file_path': '/add-histograms-cache/%s' % filename} gcs_file = cloudstorage.open(params['gcs_file_path'], 'w', content_type='application/octet-stream', retry_params=_RETRY_PARAMS) gcs_file.write(data_str) gcs_file.close() retry_options = taskqueue.TaskRetryOptions( task_retry_limit=_TASK_RETRY_LIMIT) queue = taskqueue.Queue('default') queue.add( taskqueue.Task(url='/add_histograms/process', payload=json.dumps(params), retry_options=retry_options))
def GetReport(template_id, revisions): with timing.WallTimeLogger('GetReport'), timing.CpuTimeLogger('GetReport'): try: template = ndb.Key('ReportTemplate', template_id).get() except AssertionError: # InternalOnlyModel._post_get_hook asserts that the user can access the # entity. return None result = {'editable': False} if template: result['owners'] = template.owners result['editable'] = utils.GetEmail() in template.owners result['report'] = report_query.ReportQuery( template.template, revisions).FetchSync() else: for handler in ListStaticTemplates(): if handler.template.key.id() != template_id: continue template = handler.template report = handler(revisions) if isinstance(report, report_query.ReportQuery): report = report.FetchSync() result['report'] = report break if template is None: return None result['id'] = template.key.id() result['name'] = template.name result['internal'] = template.internal_only return result
def _FetchTests(self): with timing.WallTimeLogger('fetch_tests'): tests = yield [key.get_async() for key in self._test_keys] tests = [test for test in tests if test] if not tests: raise api_request_handler.NotFoundError improvement_direction = None for test in tests: if test.internal_only: self._private = True test_desc = yield descriptor.Descriptor.FromTestPathAsync( utils.TestPath(test.key)) # The unit for 'count' statistics is trivially always 'count'. Callers # certainly want the units of the measurement, which is the same as the # units of the 'avg' and 'std' statistics. if self._units is None or test_desc.statistic != 'count': self._units = test.units improvement_direction = test.improvement_direction if improvement_direction == anomaly.DOWN: self._improvement_direction = 'down' elif improvement_direction == anomaly.UP: self._improvement_direction = 'up' else: self._improvement_direction = None
def Post(self): if utils.IsDevAppserver(): # Don't require developers to zip the body. # In prod, the data will be written to cloud storage and processed on the # taskqueue, so the caller will not see any errors. In dev_appserver, # process the data immediately so the caller will see errors. # Also always create upload completion token for such requests. token, token_info = self._CreateUploadCompletionToken() ProcessHistogramSet( _LoadHistogramList(StringIO.StringIO(self.request.body)), token) token.UpdateStateAsync( upload_completion_token.State.COMPLETED).wait() return token_info with timing.WallTimeLogger('decompress'): try: data_str = self.request.body # Try to decompress at most 100 bytes from the data, only to determine # if we've been given compressed payload. zlib.decompressobj().decompress(data_str, 100) logging.info('Received compressed data.') except zlib.error: data_str = self.request.get('data') if not data_str: raise api_request_handler.BadRequestError( 'Missing or uncompressed data.') data_str = zlib.compress(data_str) logging.info('Received uncompressed data.') if not data_str: raise api_request_handler.BadRequestError( 'Missing "data" parameter') filename = uuid.uuid4() params = {'gcs_file_path': '/add-histograms-cache/%s' % filename} gcs_file = cloudstorage.open(params['gcs_file_path'], 'w', content_type='application/octet-stream', retry_params=_RETRY_PARAMS) gcs_file.write(data_str) gcs_file.close() token_info = None if utils.ShouldTurnOnUploadCompletionTokenExperiment(): _, token_info = self._CreateUploadCompletionToken( params['gcs_file_path']) params['upload_completion_token'] = token_info['token'] retry_options = taskqueue.TaskRetryOptions( task_retry_limit=_TASK_RETRY_LIMIT) queue = taskqueue.Queue('default') queue.add( taskqueue.Task(url='/add_histograms/process', payload=json.dumps(params), retry_options=retry_options)) return token_info
def AuthorizedPost(self): datastore_hooks.SetPrivilegedRequest() with timing.WallTimeLogger('decompress'): try: data_str = zlib.decompress(self.request.body) logging.info('Recieved compressed data.') except zlib.error: data_str = self.request.get('data') logging.info('Recieved uncompressed data.') if not data_str: raise api_request_handler.BadRequestError( 'Missing "data" parameter') logging.info('Received data: %s', data_str[:200]) with timing.WallTimeLogger('json.loads'): histogram_dicts = json.loads(data_str) ProcessHistogramSet(histogram_dicts)
def List(): with timing.WallTimeLogger('List'), timing.CpuTimeLogger('List'): templates = ReportTemplate.query().fetch() templates += [handler.template for handler in ListStaticTemplates()] templates = [{ 'id': template.key.id(), 'name': template.name, 'modified': template.modified.isoformat(), } for template in templates] return sorted(templates, key=lambda d: d['name'])
def _LoadTaskGraph(job): with timing.WallTimeLogger('ExecutionEngine:_LoadTaskGraph'): tasks = Task.query(ancestor=job.key).fetch() # The way we get the terminal tasks is by looking at tasks where nothing # depends on them. has_dependents = set() for task in tasks: has_dependents |= set(task.dependencies) terminal_tasks = [t.key for t in tasks if t.key not in has_dependents] return ReconstitutedTaskGraph(terminal_tasks=terminal_tasks, tasks={task.key: task for task in tasks})
def Post(self): if utils.IsDevAppserver(): # Don't require developers to zip the body. # In prod, the data will be written to cloud storage and processed on the # taskqueue, so the caller will not see any errors. In dev_appserver, # process the data immediately so the caller will see errors. ProcessHistogramSet(json.loads(self.request.body)) return with timing.WallTimeLogger('decompress'): try: data_str = self.request.body # Try to decompress at most 100 bytes from the data, only to determine # if we've been given compressed payload. zlib.decompressobj().decompress(data_str, 100) logging.info('Received compressed data.') except zlib.error: data_str = self.request.get('data') if not data_str: raise api_request_handler.BadRequestError( 'Missing or uncompressed data.') data_str = zlib.compress(data_str) logging.info('Received uncompressed data.') if not data_str: raise api_request_handler.BadRequestError( 'Missing "data" parameter') filename = uuid.uuid4() params = {'gcs_file_path': '/add-histograms-cache/%s' % filename} gcs_file = cloudstorage.open(params['gcs_file_path'], 'w', content_type='application/octet-stream', retry_params=_RETRY_PARAMS) gcs_file.write(data_str) gcs_file.close() retry_options = taskqueue.TaskRetryOptions( task_retry_limit=_TASK_RETRY_LIMIT) queue = taskqueue.Queue('default') queue.add( taskqueue.Task(url='/add_histograms/process', payload=json.dumps(params), retry_options=retry_options))
def post(self): datastore_hooks.SetPrivilegedRequest() try: params = json.loads(self.request.body) gcs_file_path = params['gcs_file_path'] try: gcs_file = cloudstorage.open( gcs_file_path, 'r', retry_params=_RETRY_PARAMS) contents = gcs_file.read() data_str = zlib.decompress(contents) gcs_file.close() finally: cloudstorage.delete(gcs_file_path, retry_params=_RETRY_PARAMS) with timing.WallTimeLogger('json.loads'): histogram_dicts = json.loads(data_str) ProcessHistogramSet(histogram_dicts) except Exception as e: # pylint: disable=broad-except logging.error('Error processing histograms: %r', e.message) self.response.out.write(json.dumps({'error': e.message}))
def ProcessHistogramSet(histogram_dicts, completion_token=None): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON must be a list of dicts') histograms = histogram_set.HistogramSet() with timing.WallTimeLogger('hs.ImportDicts'): histograms.ImportDicts(histogram_dicts) with timing.WallTimeLogger('hs.DeduplicateDiagnostics'): histograms.DeduplicateDiagnostics() if len(histograms) == 0: raise api_request_handler.BadRequestError( 'HistogramSet JSON must contain at least one histogram.') with timing.WallTimeLogger('hs._LogDebugInfo'): _LogDebugInfo(histograms) with timing.WallTimeLogger('InlineDenseSharedDiagnostics'): InlineDenseSharedDiagnostics(histograms) # TODO(#4242): Get rid of this. # https://github.com/catapult-project/catapult/issues/4242 with timing.WallTimeLogger('_PurgeHistogramBinData'): _PurgeHistogramBinData(histograms) with timing.WallTimeLogger('_GetDiagnosticValue calls'): master = _GetDiagnosticValue(reserved_infos.MASTERS.name, histograms.GetFirstHistogram()) bot = _GetDiagnosticValue(reserved_infos.BOTS.name, histograms.GetFirstHistogram()) benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name, histograms.GetFirstHistogram()) benchmark_description = _GetDiagnosticValue( reserved_infos.BENCHMARK_DESCRIPTIONS.name, histograms.GetFirstHistogram(), optional=True) with timing.WallTimeLogger('_ValidateMasterBotBenchmarkName'): _ValidateMasterBotBenchmarkName(master, bot, benchmark) with timing.WallTimeLogger('ComputeRevision'): suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark)) logging.info('Suite: %s', suite_key.id()) revision = ComputeRevision(histograms) logging.info('Revision: %s', revision) internal_only = graph_data.Bot.GetInternalOnlySync(master, bot) revision_record = histogram.HistogramRevisionRecord.GetOrCreate( suite_key, revision) revision_record.put() last_added = histogram.HistogramRevisionRecord.GetLatest( suite_key).get_result() # On first upload, a query immediately following a put may return nothing. if not last_added: last_added = revision_record _CheckRequest(last_added, 'No last revision') # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. with timing.WallTimeLogger('FindSuiteLevelSparseDiagnostics'): suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics( histograms, suite_key, revision, internal_only) # TODO(896856): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. with timing.WallTimeLogger('DeduplicateAndPut'): new_guids_to_old_diagnostics = ( histogram.SparseDiagnostic.FindOrInsertDiagnostics( suite_level_sparse_diagnostic_entities, suite_key, revision, last_added.revision).get_result()) with timing.WallTimeLogger('ReplaceSharedDiagnostic calls'): for new_guid, old_diagnostic in new_guids_to_old_diagnostics.items(): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) with timing.WallTimeLogger('_CreateHistogramTasks'): tasks = _CreateHistogramTasks(suite_key.id(), histograms, revision, benchmark_description, completion_token) with timing.WallTimeLogger('_QueueHistogramTasks'): _QueueHistogramTasks(tasks)
def QueryAsync(cls, bot_name=None, bug_id=None, count_limit=0, deadline_seconds=50, inequality_property=None, is_improvement=None, key=None, keys_only=False, limit=100, master_name=None, max_end_revision=None, max_start_revision=None, max_timestamp=None, min_end_revision=None, min_start_revision=None, min_timestamp=None, recovered=None, subscriptions=None, start_cursor=None, test=None, test_keys=None, test_suite_name=None, project_id=None): if key: # This tasklet isn't allowed to catch the internal_only AssertionError. alert = yield ndb.Key(urlsafe=key).get_async() raise ndb.Return(([alert], None, 1)) # post_filters can cause results to be empty, depending on the shape of the # data and which filters are applied in the query and which filters are # applied after the query. Automatically chase cursors until some results # are found, but stay under the request timeout. results = [] deadline = time.time() + deadline_seconds while not results and time.time() < deadline: query = cls.query() equality_properties = [] if subscriptions: # Empty subscriptions is not allowed in query query = query.filter(cls.subscription_names.IN(subscriptions)) equality_properties.append('subscription_names') inequality_property = 'key' if is_improvement is not None: query = query.filter(cls.is_improvement == is_improvement) equality_properties.append('is_improvement') inequality_property = 'key' if bug_id is not None: if bug_id == '': query = query.filter(cls.bug_id == None) equality_properties.append('bug_id') inequality_property = 'key' elif bug_id != '*': query = query.filter(cls.bug_id == int(bug_id)) equality_properties.append('bug_id') inequality_property = 'key' # bug_id='*' translates to bug_id != None, which is handled with the # other inequality filters. if project_id is not None: query = query.filter(cls.project_id == project_id) equality_properties.append('project_id') inequality_property = 'key' if recovered is not None: query = query.filter(cls.recovered == recovered) equality_properties.append('recovered') inequality_property = 'key' if test or test_keys: if not test_keys: test_keys = [] if test: test_keys += [ utils.OldStyleTestKey(test), utils.TestMetadataKey(test) ] query = query.filter(cls.test.IN(test_keys)) query = query.order(cls.key) equality_properties.append('test') inequality_property = 'key' if master_name: query = query.filter(cls.master_name == master_name) equality_properties.append('master_name') inequality_property = 'key' if bot_name: query = query.filter(cls.bot_name == bot_name) equality_properties.append('bot_name') inequality_property = 'key' if test_suite_name: query = query.filter(cls.benchmark_name == test_suite_name) equality_properties.append('benchmark_name') inequality_property = 'key' query, post_filters = cls._InequalityFilters( query, equality_properties, inequality_property, bug_id, min_end_revision, max_end_revision, min_start_revision, max_start_revision, min_timestamp, max_timestamp) if post_filters: keys_only = False query = query.order(-cls.timestamp, cls.key) futures = [ query.fetch_page_async(limit, start_cursor=start_cursor, keys_only=keys_only) ] if count_limit: futures.append(query.count_async(count_limit)) query_duration = timing.WallTimeLogger('query_duration') with query_duration: yield futures results, start_cursor, more = futures[0].get_result() if count_limit: count = futures[1].get_result() else: count = len(results) logging.info('query_results_count=%d', len(results)) if results: logging.info('duration_per_result=%f', query_duration.seconds / len(results)) if post_filters: results = [ alert for alert in results if all( post_filter(alert) for post_filter in post_filters) ] if not more: start_cursor = None if not start_cursor: break raise ndb.Return((results, start_cursor, count))
def _FetchDiagnostics(self): with timing.WallTimeLogger('fetch_diagnosticss'): yield [self._FetchDiagnosticsForTest(test) for test in self._unsuffixed_test_metadata_keys]
def QueryAsync(cls, bot_name=None, bug_id=None, count_limit=0, deadline_seconds=50, inequality_property=None, is_improvement=None, key=None, keys_only=False, limit=100, master_name=None, max_end_revision=None, max_start_revision=None, max_timestamp=None, min_end_revision=None, min_start_revision=None, min_timestamp=None, recovered=None, sheriff=None, start_cursor=None, test=None, test_suite_name=None): if key: # This tasklet isn't allowed to catch the internal_only AssertionError. alert = yield ndb.Key(urlsafe=key).get_async() raise ndb.Return(([alert], None, 1)) # post_filters can cause results to be empty, depending on the shape of the # data and which filters are applied in the query and which filters are # applied after the query. Automatically chase cursors until some results # are found, but stay under the request timeout. results = [] deadline = time.time() + deadline_seconds while not results and time.time() < deadline: query = cls.query() if sheriff is not None: sheriff_key = ndb.Key('Sheriff', sheriff) sheriff_entity = yield sheriff_key.get_async() if sheriff_entity: query = query.filter(cls.sheriff == sheriff_key) if is_improvement is not None: query = query.filter(cls.is_improvement == is_improvement) if bug_id is not None: if bug_id == '': bug_id = None else: bug_id = int(bug_id) query = query.filter(cls.bug_id == bug_id) if recovered is not None: query = query.filter(cls.recovered == recovered) if test: query = query.filter( cls.test.IN([ utils.OldStyleTestKey(test), utils.TestMetadataKey(test) ])) query = query.order(cls.key) if master_name: query = query.filter(cls.master_name == master_name) if bot_name: query = query.filter(cls.bot_name == bot_name) if test_suite_name: query = query.filter(cls.benchmark_name == test_suite_name) query, post_filters = cls._InequalityFilters( query, inequality_property, min_end_revision, max_end_revision, min_start_revision, max_start_revision, min_timestamp, max_timestamp) if post_filters: keys_only = False query = query.order(-cls.timestamp) futures = [ query.fetch_page_async(limit, start_cursor=start_cursor, keys_only=keys_only) ] if count_limit: futures.append(query.count_async(count_limit)) query_duration = timing.WallTimeLogger('query_duration') with query_duration: yield futures results, start_cursor, more = futures[0].get_result() if count_limit: count = futures[1].get_result() else: count = len(results) logging.info('query_results_count=%d', len(results)) if results: logging.info('duration_per_result=%f', query_duration.seconds / len(results)) if post_filters: results = [ alert for alert in results if all( post_filter(alert) for post_filter in post_filters) ] if not more: start_cursor = None if not start_cursor: break raise ndb.Return((results, start_cursor, count))
def Evaluate(job, event, evaluator): """Applies an evaluator given a task in the task graph and an event as input. This function implements a depth-first search traversal of the task graph and applies the `evaluator` given a task and the event input in post-order traversal. We start the DFS from the terminal tasks (those that don't have dependencies) and call the `evaluator` function with a representation of the task in the graph, an `event` as input, and an accumulator argument. The `evaluator` must be a callable which accepts three arguments: - task: an InMemoryTask instance, representing a task in the graph. - event: an object whose shape/type is defined by the caller of the `Evaluate` function and that the evaluator can handle. - accumulator: a dictionary which is mutable which is valid in the scope of a traversal of the graph. The `evaluator` must return either None or an iterable of callables which take a single argument, which is the accumulator at the end of a traversal. Events are free-form but usually are dictionaries which constitute inputs that are external to the task graph evaluation. This could model events in an event-driven evaluation of tasks, or synthetic inputs to the system. It is more important that the `event` information is known to the evaluator implementation, and is provided as-is to the evaluator in this function. The Evaluate function will keep iterating while there are actions still being produced by the evaluator. When there are no more actions to run, the Evaluate function will return the most recent traversal's accumulator. """ if job is None: raise ValueError('job must not be None.') accumulator = {} actions = [NoopAction()] while actions: for action in actions: logging.debug('Running action: %s', action) # Each action should be a callable which takes the accumulator as an # input. We want to run each action in their own transaction as well. # This must not be called in a transaction. with timing.WallTimeLogger('ExecutionEngine:ActionRunner<%s>' % (type(action).__name__, )): action(accumulator) # Clear the actions and accumulator for this traversal. del actions[:] accumulator.clear() # Load the graph transactionally. graph = _LoadTaskGraph(job) if not graph.tasks: logging.debug('Task graph empty for job %s', job.job_id) return # First get all the "terminal" tasks, and traverse the dependencies in a # depth-first-search. task_stack = [graph.tasks[task] for task in graph.terminal_tasks] # If the stack is empty, we should start at an arbitrary point. if not task_stack: task_stack = [graph.tasks.values()[0]] vertex_states = {} while task_stack: task = task_stack[-1] state = vertex_states.get(task.key, NOT_EVALUATED) if state == CHILDREN_PENDING: in_memory_task = task.ToInMemoryTask() result_actions = evaluator(in_memory_task, event, accumulator) if result_actions: actions.extend(result_actions) vertex_states[task.key] = EVALUATION_DONE elif state == NOT_EVALUATED: # This vertex is coloured white, we should traverse the dependencies. vertex_states[task.key] = CHILDREN_PENDING for dependency in task.dependencies: if vertex_states.get(dependency, NOT_EVALUATED) == NOT_EVALUATED: task_stack.append(graph.tasks[dependency]) else: assert state == EVALUATION_DONE task_stack.pop() return accumulator
def ProcessHistogramSet(histogram_dicts): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON much be a list of dicts') bot_whitelist_future = stored_object.GetAsync( add_point_queue.BOT_WHITELIST_KEY) histograms = histogram_set.HistogramSet() with timing.WallTimeLogger('hs.ImportDicts'): histograms.ImportDicts(histogram_dicts) with timing.WallTimeLogger('hs.ResolveRelatedHistograms'): histograms.ResolveRelatedHistograms() with timing.WallTimeLogger('hs.DeduplicateDiagnostics'): histograms.DeduplicateDiagnostics() if len(histograms) == 0: raise api_request_handler.BadRequestError( 'HistogramSet JSON must contain at least one histogram.') with timing.WallTimeLogger('hs._LogDebugInfo'): _LogDebugInfo(histograms) with timing.WallTimeLogger('InlineDenseSharedDiagnostics'): InlineDenseSharedDiagnostics(histograms) # TODO(eakuefner): Get rid of this. # https://github.com/catapult-project/catapult/issues/4242 with timing.WallTimeLogger('_PurgeHistogramBinData'): _PurgeHistogramBinData(histograms) with timing.WallTimeLogger('_GetDiagnosticValue calls'): master = _GetDiagnosticValue(reserved_infos.MASTERS.name, histograms.GetFirstHistogram()) bot = _GetDiagnosticValue(reserved_infos.BOTS.name, histograms.GetFirstHistogram()) benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name, histograms.GetFirstHistogram()) benchmark_description = _GetDiagnosticValue( reserved_infos.BENCHMARK_DESCRIPTIONS.name, histograms.GetFirstHistogram(), optional=True) with timing.WallTimeLogger('_ValidateMasterBotBenchmarkName'): _ValidateMasterBotBenchmarkName(master, bot, benchmark) with timing.WallTimeLogger('ComputeRevision'): suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark)) logging.info('Suite: %s', suite_key.id()) revision = ComputeRevision(histograms) bot_whitelist = bot_whitelist_future.get_result() internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. with timing.WallTimeLogger('FindSuiteLevelSparseDiagnostics'): suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics( histograms, suite_key, revision, internal_only) # TODO(eakuefner): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. with timing.WallTimeLogger('DeduplicateAndPut'): new_guids_to_old_diagnostics = DeduplicateAndPut( suite_level_sparse_diagnostic_entities, suite_key, revision) with timing.WallTimeLogger('ReplaceSharedDiagnostic calls'): for new_guid, old_diagnostic in new_guids_to_old_diagnostics.iteritems( ): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) with timing.WallTimeLogger('_BatchHistogramsIntoTasks'): tasks = _BatchHistogramsIntoTasks(suite_key.id(), histograms, revision, benchmark_description) with timing.WallTimeLogger('_QueueHistogramTasks'): _QueueHistogramTasks(tasks)