def AnalysisSerializer(task, _, accumulator): analysis_results = accumulator.setdefault(task.id, {}) read_option_template = task.payload.get('read_option_template') graph_json_options = read_option_template.get('graph_json_options', {}) metric = None if read_option_template.get('mode') == 'histogram_sets': metric = read_option_template.get('benchmark') if read_option_template.get('mode') == 'graph_json': metric = graph_json_options.get('chart') analysis_results.update({ 'changes': [ change_module.ReconstituteChange(change) for change in task.payload.get('changes', []) ], 'comparison_mode': task.payload.get('comparison_mode'), 'comparisons': task.payload.get('comparisons', []), 'culprits': task.payload.get('culprits', []), 'metric': metric, 'result_values': task.payload.get('result_values', []) })
def __call__(self, task, event, _): # Outline: # - Check build status payload. # - If successful, update the task payload with status and relevant # information, propagate information into the accumulator. # - If unsuccessful: # - Retry if the failure is a retryable error (update payload with # retry information) # - Fail if failure is non-retryable or we've exceeded retries. if event.type == 'update': change = change_module.ReconstituteChange( task.payload.get('change')) return [UpdateBuildStatusAction(self.job, task, change, event)] return None
def __call__(self, task, _, accumulator): # Outline: # - If the task is still pending, this means this is the first time we're # encountering the task in an evaluation. Set up the payload data to # include the full range of commits, so that we load it once and have it # ready, and emit an action to mark the task ongoing. # # - If the task is ongoing, gather all the dependency data (both results # and status) and see whether we have enough data to determine the next # action. We have three main cases: # # 1. We cannot detect a significant difference between the results from # two different CLs. We call this the NoReproduction case. # # 2. We do not have enough confidence that there's a difference. We call # this the Indeterminate case. # # 3. We have enough confidence that there's a difference between any two # ordered changes. We call this the SignificantChange case. # # - Delegate the implementation to handle the independent cases for each # change point we find in the CL continuum. if task.status == 'pending': return [PrepareCommits(self.job, task)] all_changes = None actions = [] if 'changes' not in task.payload: all_changes = [ change_module.Change( commits=[ change_module.Commit( repository=commit.get('repository'), git_hash=commit.get('git_hash')) ], patch=task.payload.get('pinned_change')) for commit in task.payload.get('commits', []) ] task.payload.update({ 'changes': [change.AsDict() for change in all_changes], }) actions.append(UpdateTaskPayloadAction(self.job, task)) else: # We need to reconstitute the Change instances from the dicts we've stored # in the payload. all_changes = [ change_module.ReconstituteChange(change) for change in task.payload.get('changes') ] if task.status == 'ongoing': # TODO(dberris): Validate and fail gracefully instead of asserting? assert 'commits' in task.payload, ('Programming error, need commits to ' 'proceed!') # Collect all the dependency task data and analyse the results. # Group them by change. # Order them by appearance in the CL range. # Also count the status per CL (failed, ongoing, etc.) deps = set(task.dependencies) results_by_change = collections.defaultdict(list) status_by_change = collections.defaultdict(dict) changes_with_data = set() changes_by_status = collections.defaultdict(set) associated_results = [(change_module.ReconstituteChange(t.get('change')), t.get('status'), t.get('result_values')) for dep, t in accumulator.items() if dep in deps] for change, status, result_values in associated_results: if result_values: filtered_results = [r for r in result_values if r is not None] if filtered_results: results_by_change[change].append(filtered_results) status_by_change[change].update({ status: status_by_change[change].get(status, 0) + 1, }) changes_by_status[status].add(change) changes_with_data.add(change) # If the dependencies have converged into a single status, we can make # decisions on the terminal state of the bisection. if len(changes_by_status) == 1 and changes_with_data: # Check whether all dependencies are completed and if we do # not have data in any of the dependencies. if changes_by_status.get('completed') == changes_with_data: changes_with_empty_results = [ change for change in changes_with_data if not results_by_change.get(change) ] if changes_with_empty_results: task.payload.update({ 'errors': task.payload.get('errors', []) + [{ 'reason': 'BisectionFailed', 'message': ('We did not find any results from ' 'successful test runs.') }] }) return [CompleteExplorationAction(self.job, task, 'failed')] # Check whether all the dependencies had the tests fail consistently. elif changes_by_status.get('failed') == changes_with_data: task.payload.update({ 'errors': task.payload.get('errors', []) + [{ 'reason': 'BisectionFailed', 'message': 'All attempts in all dependencies failed.' }] }) return [CompleteExplorationAction(self.job, task, 'failed')] # If they're all pending or ongoing, then we don't do anything yet. else: return actions # We want to reduce the list of ordered changes to only the ones that have # data available. change_index = {change: index for index, change in enumerate(all_changes)} ordered_changes = [c for c in all_changes if c in changes_with_data] # From here we can then do the analysis on a pairwise basis, as we're # going through the list of Change instances we have data for. # NOTE: A lot of this algorithm is already in pinpoint/models/job_state.py # which we're adapting. def Compare(a, b): # This is the comparison function which determines whether the samples # we have from the two changes (a and b) are statistically significant. if a is None or b is None: return None if 'pending' in status_by_change[a] or 'pending' in status_by_change[b]: return compare.PENDING # NOTE: Here we're attempting to scale the provided comparison magnitude # threshold by the larger inter-quartile range (a measure of dispersion, # simply computed as the 75th percentile minus the 25th percentile). The # reason we're doing this is so that we can scale the tolerance # according to the noise inherent in the measurements -- i.e. more noisy # measurements will require a larger difference for us to consider # statistically significant. values_for_a = tuple(itertools.chain(*results_by_change[a])) values_for_b = tuple(itertools.chain(*results_by_change[b])) if not values_for_a: return None if not values_for_b: return None max_iqr = max( math_utils.Iqr(values_for_a), math_utils.Iqr(values_for_b), 0.001) comparison_magnitude = task.payload.get('comparison_magnitude', 1.0) / max_iqr attempts = (len(values_for_a) + len(values_for_b)) // 2 result = compare.Compare(values_for_a, values_for_b, attempts, 'performance', comparison_magnitude) return result.result def DetectChange(change_a, change_b): # We return None if the comparison determines that the result is # inconclusive. This is required by the exploration.Speculate contract. comparison = Compare(change_a, change_b) if comparison == compare.UNKNOWN: return None return comparison == compare.DIFFERENT changes_to_refine = [] def CollectChangesToRefine(a, b): # Here we're collecting changes that need refinement, which happens when # two changes when compared yield the "unknown" result. attempts_for_a = sum(status_by_change[a].values()) attempts_for_b = sum(status_by_change[b].values()) # Grow the attempts of both changes by 50% every time when increasing # attempt counts. This number is arbitrary, and we should probably use # something like a Fibonacci sequence when scaling attempt counts. new_attempts_size_a = min( attempts_for_a + (attempts_for_a // 2), task.payload.get('analysis_options', {}).get('max_attempts', 100)) new_attempts_size_b = min( attempts_for_b + (attempts_for_b // 2), task.payload.get('analysis_options', {}).get('max_attempts', 100)) # Only refine if the new attempt sizes are not large enough. if new_attempts_size_a > attempts_for_a: changes_to_refine.append((a, new_attempts_size_a)) if new_attempts_size_b > attempts_for_b: changes_to_refine.append((b, new_attempts_size_b)) def FindMidpoint(a, b): # Here we use the (very simple) midpoint finding algorithm given that we # already have the full range of commits to bisect through. a_index = change_index[a] b_index = change_index[b] subrange = all_changes[a_index:b_index + 1] return None if len(subrange) <= 2 else subrange[len(subrange) // 2] # We have a striding iterable, which will give us the before, current, and # after for a given index in the iterable. def SlidingTriple(iterable): """s -> (None, s0, s1), (s0, s1, s2), (s1, s2, s3), ...""" p, c, n = itertools.tee(iterable, 3) p = itertools.chain([None], p) n = itertools.chain(itertools.islice(n, 1, None), [None]) return itertools.izip(p, c, n) # This is a comparison between values at a change and the values at # the previous change and the next change. comparisons = [{ 'prev': Compare(p, c), 'next': Compare(c, n), } for (p, c, n) in SlidingTriple(ordered_changes)] # Collect the result values for each change with values. result_values = [ list(itertools.chain(*results_by_change.get(change, []))) for change in ordered_changes ] if task.payload.get('comparisons') != comparisons or task.payload.get( 'result_values') != result_values: task.payload.update({ 'comparisons': comparisons, 'result_values': result_values, }) actions.append(UpdateTaskPayloadAction(self.job, task)) if len(ordered_changes) < 2: # We do not have enough data yet to determine whether we should do # anything. return actions additional_changes = exploration.Speculate( ordered_changes, change_detected=DetectChange, on_unknown=CollectChangesToRefine, midpoint=FindMidpoint, levels=_DEFAULT_SPECULATION_LEVELS) # At this point we can collect the actions to extend the task graph based # on the results of the speculation, only if the changes don't have any # more associated pending/ongoing work. min_attempts = task.payload.get('analysis_options', {}).get('min_attempts', 10) actions += [ RefineExplorationAction(self.job, task, change, new_size) for change, new_size in itertools.chain( [(c, min_attempts) for _, c in additional_changes], [(c, a) for c, a in changes_to_refine], ) if not bool({'pending', 'ongoing'} & set(status_by_change[change])) ] # Here we collect the points where we've found the changes. def Pairwise(iterable): """s -> (s0, s1), (s1, s2), (s2, s3), ...""" a, b = itertools.tee(iterable) next(b, None) return itertools.izip(a, b) task.payload.update({ 'culprits': [(a.AsDict(), b.AsDict()) for a, b in Pairwise(ordered_changes) if DetectChange(a, b)], }) can_complete = not bool(set(changes_by_status) - {'failed', 'completed'}) if not actions and can_complete: # Mark this operation complete, storing the differences we can compute. actions = [CompleteExplorationAction(self.job, task, 'completed')] return actions
def __call__(self, _): start_change = change_module.ReconstituteChange( self.task.payload['start_change']) end_change = change_module.ReconstituteChange( self.task.payload['end_change']) try: # We're storing this once, so that we don't need to always get this when # working with the individual commits. This reduces our reliance on # datastore operations throughout the course of handling the culprit # finding process. # # TODO(dberris): Expand the commits into the full table of dependencies? # Because every commit in the chromium repository is likely to be building # against different versions of the dependencies (v8, skia, etc.) # we'd need to expand the concept of a changelist (CL, or Change in the # Pinpoint codebase) so that we know which versions of the dependencies to # use in specific CLs. Once we have this, we might be able to operate # cleanly on just Change instances instead of just raw commits. # # TODO(dberris): Model the "merge-commit" like nature of auto-roll CLs by # allowing the preparation action to model the non-linearity of the # history. This means we'll need a concept of levels, where changes in a # single repository history (the main one) operates at a higher level # linearly, and if we're descending into rolls that we're exploring a # lower level in the linear history. This is similar to the following # diagram: # # main -> m0 -> m1 -> m2 -> roll0 -> m3 -> ... # | # dependency .............. +-> d0 -> d1 # # Ideally we'll already have this expanded before we go ahead and perform # a bisection, to amortise the cost of making requests to back-end # services for this kind of information in tight loops. commits = change_module.Commit.CommitRange(start_change.base_commit, end_change.base_commit) self.task.payload.update({ 'commits': [ collections.OrderedDict( [('repository', start_change.base_commit.repository), ('git_hash', start_change.base_commit.git_hash)]) ] + [ collections.OrderedDict( [('repository', start_change.base_commit.repository), ('git_hash', commit['commit'])]) for commit in reversed(commits) ] }) task_module.UpdateTask( self.job, self.task.id, new_state='ongoing', payload=self.task.payload) except gitiles_service.NotFoundError as e: # TODO(dberris): We need to be more resilient to intermittent failures # from the Gitiles service here. self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'GitilesFetchError', 'message': e.message }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload)
def _FormatAndPostBugCommentOnComplete(self): logging.debug('Processing outputs.') if self._IsTryJob(): # There is no comparison metric. title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), labels=['Pinpoint-Tryjob-Completed'], _retry_options=RETRY_OPTIONS) return # There is a comparison metric. differences = [] result_values = {} if not self.use_execution_engine: differences = self.state.Differences() for change_a, change_b in differences: result_values.setdefault(change_a, self.state.ResultValues(change_a)) result_values.setdefault(change_b, self.state.ResultValues(change_b)) else: logging.debug('Execution Engine: Finding culprits.') context = task_module.Evaluate( self, event_module.SelectEvent(), evaluators.Selector( event_type='select', include_keys={'culprits', 'change', 'result_values'})) differences = [ (change_module.ReconstituteChange(change_a), change_module.ReconstituteChange(change_b)) for change_a, change_b in context.get('performance_bisection', {}).get('culprits', []) ] result_values = { change_module.ReconstituteChange(v.get('change')): v.get('result_values') for v in context.values() if 'change' in v and 'result_values' in v } if not differences: title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), labels=['Pinpoint-No-Repro'], _retry_options=RETRY_OPTIONS) return # Collect the result values for each of the differences difference_details = [] commit_infos = [] commits_with_deltas = {} for change_a, change_b in differences: if change_b.patch: commit = change_b.patch else: commit = change_b.last_commit commit_info = commit.AsDict() values_a = result_values[change_a] values_b = result_values[change_b] difference = _FormatDifferenceForBug(commit_info, values_a, values_b, self.state.metric) difference_details.append(difference) commit_infos.append(commit_info) if values_a and values_b: mean_delta = job_state.Mean(values_b) - job_state.Mean( values_a) commits_with_deltas[commit.id_string] = (mean_delta, commit_info) deferred.defer(_UpdatePostAndMergeDeferred, difference_details, commit_infos, list(commits_with_deltas.values()), self.bug_id, self.tags, self.url, _retry_options=RETRY_OPTIONS)
def _FormatAndPostBugCommentOnComplete(self): logging.debug('Processing outputs.') if self._IsTryJob(): # There is no comparison metric. title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), project=self.project, labels=['Pinpoint-Tryjob-Completed'], _retry_options=RETRY_OPTIONS) return # There is a comparison metric. differences = [] result_values = {} changes_examined = None if not self.use_execution_engine: differences = self.state.Differences() for change_a, change_b in differences: result_values.setdefault(change_a, self.state.ResultValues(change_a)) result_values.setdefault(change_b, self.state.ResultValues(change_b)) changes_examined = self.state.ChangesExamined() else: logging.debug('Execution Engine: Finding culprits.') context = task_module.Evaluate( self, event_module.SelectEvent(), evaluators.Selector( event_type='select', include_keys={'culprits', 'change', 'result_values'})) differences = [ (change_module.ReconstituteChange(change_a), change_module.ReconstituteChange(change_b)) for change_a, change_b in context.get('performance_bisection', {}).get('culprits', []) ] result_values = { change_module.ReconstituteChange(v.get('change')): v.get('result_values') for v in context.values() if 'change' in v and 'result_values' in v } if not differences: # When we cannot find a difference, we want to not only update the issue # with that (minimal) information but also automatically mark the issue # WontFix. This is based on information we've gathered in production that # most issues where we find Pinpoint cannot reproduce the difference end # up invariably as "Unconfirmed" with very little follow-up. title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), project=self.project, labels=['Pinpoint-No-Repro'], status='WontFix', _retry_options=RETRY_OPTIONS) return # Collect the result values for each of the differences bug_update_builder = job_bug_update.DifferencesFoundBugUpdateBuilder( self.state.metric) bug_update_builder.SetExaminedCount(changes_examined) for change_a, change_b in differences: if change_b.patch: commit = change_b.patch else: commit = change_b.last_commit values_a = result_values[change_a] values_b = result_values[change_b] bug_update_builder.AddDifference(commit, values_a, values_b) deferred.defer(job_bug_update.UpdatePostAndMergeDeferred, bug_update_builder, self.bug_id, self.tags, self.url, self.project, _retry_options=RETRY_OPTIONS)
def __call__(self, task, event, context): # First we delegate to the task-specific serializers, and have the # domain-aware transformers canonicalise the data in the context. We # then do a dictionary merge following a simple protocol for editing a # single context. This way the transformers can output a canonical set # of transformations to build up the (global) context. local_context = {} super(Serializer, self).__call__(task, event, local_context) # What we expect to see in the local context is data in the following # form: # # { # # The 'state' key is required to identify to which change and which # # state we should be performing the actions. # 'state': { # 'change': {...} # 'quest': <string> # # # In the quest-based system, we end up with different "execution" # # details, which come in "quest" order. In the task-based # # evaluation model, the we use the 'index' in the 'add_details' # # sub-object to identify the index in the details. # 'add_execution': { # 'add_details': { # 'index': <int> # ... # } # ... # } # # # This allows us to accumulate the resulting values we encounter # # associated with the change. # 'append_result_values': [<float>] # # # This allows us to set the comparison result for this change in # # context of other changes. # 'set_comparison': { # 'next': <string|None>, # 'prev': <string|None>, # } # } # # # If we see the 'order_changes' key in the local context, then # # that means we can sort the states according to the changes as they # # appear in the embedded 'changes' list. # 'order_changes': { # 'changes': [..] # } # # # If we see the 'set_parameters' key in the local context, then # # we can set the overall parameters we're looking to compare and # # convey in the results. # 'set_parameters': { # 'comparison_mode': <string> # 'metric': <string> # } # } # # At this point we process the context to update the global context # following the protocol defined above. if 'state' in local_context: modification = local_context['state'] states = context.setdefault('state', []) quests = context.setdefault('quests', []) # We need to find the existing state which matches the quest and the # change. If we don't find one, we create the first state entry for that. state_index = None change = modification.get('change') for index, state in enumerate(states): if state.get('change') == change: state_index = index break if state_index is None: states.append({ 'attempts': [{ 'executions': [] }], 'change': change }) state_index = len(states) - 1 quest = modification.get('quest') try: quest_index = quests.index(quest) except ValueError: quests.append(quest) quest_index = len(quests) - 1 add_execution = modification.get('add_execution') append_result_values = modification.get('append_result_values') attempt_index = modification.get('index', 0) state = states[state_index] if add_execution: attempts = state['attempts'] while len(attempts) < attempt_index + 1: attempts.append({'executions': []}) executions = state['attempts'][attempt_index]['executions'] while len(executions) < quest_index + 1: executions.append(None) executions[quest_index] = dict(add_execution) if append_result_values: state.setdefault('result_values', []).extend(append_result_values) if 'order_changes' in local_context: # Here, we'll sort the states according to their order of appearance in # the 'order_changes' list. states = context.get('state', []) if states: state_changes = { change_module.ReconstituteChange(state.get('change')) for state in states } order_changes = local_context.get('order_changes', {}) all_changes = order_changes.get('changes', []) comparisons = order_changes.get('comparisons', []) result_values = order_changes.get('result_values', []) change_index = { change: index for index, change in enumerate( known_change for known_change in all_changes if known_change in state_changes) } ordered_states = [None] * len(states) for state in states: index = change_index.get( change_module.ReconstituteChange(state.get('change'))) if index is not None: ordered_states[index] = state # Merge in the comparisons as they appear for the ordered_states. for state, comparison, result in itertools.izip_longest( ordered_states, comparisons or [], result_values or []): if state is None: continue if comparison is not None: state['comparisons'] = comparison state['result_values'] = result or [] context['state'] = ordered_states context['difference_count'] = len( order_changes.get('culprits', [])) # At this point set the default comparisons between two adjacent states # which don't have an associated comparison yet to 'pending'. states = context.get('state', []) for index, state in enumerate(states): comparisons = state.get('comparisons') if comparisons is None: state['comparisons'] = { 'prev': None if index == 0 else 'pending', 'next': None if index + 1 == len(states) else 'pending', } if 'set_parameters' in local_context: modification = local_context.get('set_parameters') context['comparison_mode'] = modification.get('comparison_mode') context['metric'] = modification.get('metric')