def MapCommitPositionsToGitHashes(end_revision, end_commit_position, start_commit_position, repo_url=CHROMIUM_GIT_REPOSITORY_URL, ref=None): """Gets git_hashes of commit_positions between start_commit_position and end_commit_position, both ends are included. Args: end_revision (str): Revision of the end commit. end_commit_position (int): Commit position of the end commit. start_commit_position (int): Commit position of the start commit. It cannot be greater than end_commit_position. repo_url (str): Url of the git repo. Default to chromium repo url. ref (str): Name of the ref. Returns: dict: A map of commit_positions in range to the corresponding git_hashes. For example, return { 1: 'rev1', 2: 'rev2', 3: 'rev3' } if end_commit_position is 3 and start_commit_position is 1. """ assert start_commit_position <= end_commit_position, ( 'start_commit_position {} is greater than end_commit_position {}'. format(start_commit_position, end_commit_position)) git_repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) commit_position_range = end_commit_position - start_commit_position + 1 logs, _ = git_repo.GetNChangeLogs(end_revision, commit_position_range) return dict((log.commit_position, log.revision) for log in logs)
def GetPossibleRevertInfoFromRevision(revision): """Parse message to get information of reverting and reverted cls.""" git_repo = CachedGitilesRepository( HttpClientAppengine(), 'https://chromium.googlesource.com/chromium/src.git') change_log = git_repo.GetChangeLog(revision) if not change_log: # pragma: no cover return {} reverted_revision = change_log.reverted_revision if not reverted_revision: return {} reverted_cl_change_log = git_repo.GetChangeLog(reverted_revision) data = { 'action': 'Reverted', 'fixed_revision': reverted_revision, 'fixed_cl_review_url': (reverted_cl_change_log.code_review_url if reverted_cl_change_log else None), 'fixed_cl_commit_position': (reverted_cl_change_log.commit_position if reverted_cl_change_log else None), 'fixing_revision': revision, 'fixing_cl_review_url': change_log.code_review_url, 'fixing_cl_commit_position': change_log.commit_position } return data
def PullChangeLogs(start_revision, end_revision, repo_url=CHROMIUM_GIT_REPOSITORY_URL, ref=None, **kwargs): """Pulls change logs for CLs between start_revision and end_revision. Args: start_revision (str): Start revision of the range, excluded. end_revision (str): End revision of the range, included. If end_revision is None, pulls all changes after start_revision. repo_url (str): Url of the git repo. Default to chromium repo url. kwargs(dict): Keyword arguments passed as additional params for the query. Returns: A dict with the following form: { 'git_hash_revision1': common.change_log.ChangeLog.ToDict(), ... } """ if not start_revision: return {} git_repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) change_logs = {} change_log_list = git_repo.GetChangeLogs(start_revision, end_revision, **kwargs) for change_log in change_log_list: change_logs[change_log.revision] = change_log return change_logs
def _GetChromiumWATCHLISTS(): repo_url = 'https://chromium.googlesource.com/chromium/src' source = CachedGitilesRepository(FinditHttpClient(), repo_url).GetSource( 'WATCHLISTS', 'master') if not source: return None # https://cs.chromium.org/chromium/src/WATCHLISTS is in python. definitions = ast.literal_eval(source).get('WATCHLIST_DEFINITIONS') return dict((k, v['filepath']) for k, v in definitions.iteritems())
def GetGitBlame(repo_url, revision, touched_file_path, ref=None): """Gets git blames of touched_file. Args: repo_url (str): Url to the repo. revision (str): Revision for the change. touched_file_path (str): Full path of a file in change_log. """ git_repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) return git_repo.GetBlame(touched_file_path, revision)
def RunImpl(self, parameters): analysis = ndb.Key(urlsafe=parameters.analysis_urlsafe_key).get() assert analysis, 'Analysis unexpectedly missing!' git_repo = CachedGitilesRepository( FinditHttpClient(), constants.CHROMIUM_GIT_REPOSITORY_URL) change_log = git_repo.GetChangeLog(parameters.flakiness.revision) flakiness = parameters.flakiness data_point = data_point_util.ConvertFlakinessToDataPoint(flakiness) data_point.commit_timestamp = change_log.committer.time analysis.flakiness_verification_data_points.append(data_point) analysis.put()
def _GetGitBlame(repo_info, touched_file_path): """Gets git blames of touched_file. Args: repo_info (dict): The repo_url and revision for the build cycle. touched_file_path (str): Full path of a file in change_log. """ if repo_info: repo_url = repo_info['repo_url'] git_repo = CachedGitilesRepository(HttpClientAppengine(), repo_url) revision = repo_info['revision'] return git_repo.GetBlame(touched_file_path, revision)
def GetCommitsInfo(revisions, repo_url=CHROMIUM_GIT_REPOSITORY_URL, ref=None): """Gets commit_positions and review urls for revisions.""" git_repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) cls = {} for revision in revisions: cls[revision] = {'revision': revision, 'repo_name': 'chromium'} change_log = git_repo.GetChangeLog(revision) if change_log: cls[revision]['commit_position'] = (change_log.commit_position) cls[revision]['url'] = (change_log.code_review_url or change_log.commit_url) cls[revision]['author'] = change_log.author.email return cls
def _GetChangedLinesForDependencyRepo(roll, file_path_in_log, line_numbers): """Gets changed line numbers for file in failure log. Tests if the same lines mentioned in failure log are changed within the DEPS roll, if so, return those line numbers. """ roll_repo = CachedGitilesRepository(HttpClientAppengine(), roll['repo_url']) old_revision = roll['old_revision'] new_revision = roll['new_revision'] old_change_log = roll_repo.GetChangeLog(old_revision) old_rev_author_time = old_change_log.author.time new_change_log = roll_repo.GetChangeLog(new_revision) new_rev_author_time = new_change_log.author.time file_change_type = None changed_line_numbers = [] if old_rev_author_time >= new_rev_author_time: # If the DEPS roll is downgrade, bail out. return file_change_type, changed_line_numbers commits_in_roll = roll_repo.GetCommitsBetweenRevisions( old_revision, new_revision) file_change_type, culprit_commit = _GetChangeTypeAndCulpritCommit( file_path_in_log, roll_repo, commits_in_roll) if culprit_commit is None: # Bail out if no commits touched the file in the log. return file_change_type, changed_line_numbers if file_change_type == ChangeType.MODIFY: # If the file was modified, use the blame information to determine which # lines were changed. blame = roll_repo.GetBlame(file_path_in_log, culprit_commit) if not blame: return file_change_type, changed_line_numbers for region in blame: if line_numbers: for line_number in line_numbers: if (line_number >= region.start and line_number <= region.start + region.count - 1 and region.revision in commits_in_roll): # One line which appears in the failure log is changed within # the DEPS roll. changed_line_numbers.append(line_number) return file_change_type, changed_line_numbers
def _GetCommitsBetweenRevisions(start_revision, end_revision): """Gets the revisions between start_revision and end_revision. Args: start_revision (str): The revision for which to get changes after. This revision is not included in the returned list. end_revision (str): The last revision in the range to return. Returns: A list of revisions sorted in order by oldest to newest. """ repo = CachedGitilesRepository(HttpClientAppengine(), _CHROMIUM_REPO_URL) commits = repo.GetCommitsBetweenRevisions(start_revision, end_revision) commits.reverse() return commits
def GetCulpritInfo(repo_name, revision): """Returns culprit info of the given revision. Returns commit position, code-review url, host and change_id. """ # TODO(stgao): get repo url at runtime based on the given repo name. # unused arg - pylint: disable=W0612,W0613 repo = CachedGitilesRepository( HttpClientAppengine(), 'https://chromium.googlesource.com/chromium/src.git') change_log = repo.GetChangeLog(revision) return { 'commit_position': change_log.commit_position, 'code_review_url': change_log.code_review_url, 'review_server_host': change_log.review_server_host, 'review_change_id': change_log.review_change_id }
def ChangeCommittedWithinTime(revision, repo_url=CHROMIUM_GIT_REPOSITORY_URL, hours=24, ref=None): """Returns True if the change was committed within the time given.""" delta = timedelta(hours=hours) git_repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) change_log = git_repo.GetChangeLog(revision) culprit_commit_time = change_log.committer.time in_time = time_util.GetUTCNow() - culprit_commit_time < delta if not in_time: logging.info( 'Culprit %s was committed over %d hours ago, stop auto ' 'commit.', revision, hours) return in_time
def GetCodeReviewInfoForACommit(revision, repo_url=CHROMIUM_GIT_REPOSITORY_URL, ref=None): """Returns change info of the given revision. Returns commit position, code-review url, host and change_id. """ repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) change_log = repo.GetChangeLog(revision) if not change_log: return {} return { 'commit_position': change_log.commit_position, 'code_review_url': change_log.code_review_url, 'review_server_host': change_log.review_server_host, 'review_change_id': change_log.review_change_id, 'author': change_log.author.ToDict(), 'committer': change_log.committer.ToDict(), }
def _RecordCacheStats(build, report): """Save the bot's state at the end of a successful. This function aims to save the following data in the data store: - The last revision that the bot synced to under the specific work directory (named cache) it used for its local checkout. - The latest revision fetched into the bot's local git cache, which is shared across all work directories. These are saved as commit positions rather than revision hashes for faster comparisons when selecting a bot for new tryjobs. """ bot = swarmbot_util.GetBot(build) cache_name = swarmbot_util.GetBuilderCacheName(build) if bot and cache_name: git_repo = CachedGitilesRepository( FinditHttpClient(), services_constants.CHROMIUM_GIT_REPOSITORY_URL) last_checked_out_revision = report.get('last_checked_out_revision') last_checked_out_cp = ( git_repo.GetChangeLog(last_checked_out_revision).commit_position if last_checked_out_revision else None) cached_revision = report.get('previously_cached_revision') cached_cp = git_repo.GetChangeLog( cached_revision).commit_position if cached_revision else None bad_revision = json.loads(build.response.get( 'parameters_json', '{}')).get('properties', {}).get('bad_revision') bad_cp = git_repo.GetChangeLog( bad_revision).commit_position if bad_revision else None # If the bad_revision is later than the previously cached revision, that # means that the bot had to sync with the remote repository, and the local # git cache was updated to that revision at least. latest_synced_cp = max(bad_cp, cached_cp) cache_stats = WfTryBotCache.Get(cache_name) cache_stats.AddBot(bot, last_checked_out_cp, latest_synced_cp) # TODO(robertocn): Record the time it took to complete the task # with a cold or warm cache. cache_stats.put()
class PullChangelogPipeline(BasePipeline): """A pipeline to pull change log of CLs.""" # TODO: for files in dependencies(blink, v8, skia, etc), use blame first. GIT_REPO = CachedGitilesRepository( HttpClientAppengine(), 'https://chromium.googlesource.com/chromium/src.git') # Arguments number differs from overridden method - pylint: disable=W0221 def run(self, failure_info): """ Args: failure_info (dict): Output of pipeline DetectFirstFailurePipeline.run(). Returns: A dict with the following form: { 'git_hash_revision1': common.change_log.ChangeLog.ToDict(), ... } """ change_logs = {} if not failure_info['failed'] or not failure_info['chromium_revision']: # Bail out if no failed step or no chromium revision. return change_logs # Bail out on infra failure if failure_info.get('failure_type') == failure_type.INFRA: return change_logs for build in failure_info.get('builds', {}).values(): for revision in build['blame_list']: change_log = self.GIT_REPO.GetChangeLog(revision) if not change_log: # pragma: no cover raise pipeline.Retry('Failed to get change log for %s' % revision) change_logs[revision] = change_log.ToDict() return change_logs
def _GetFileContentFromGitiles(report, file_path, revision): # pragma: no cover. """Fetches the content of a specific revision of a file from gitiles. Args: report (PostsubmitReport): The report that the file is associated with. file_path (str): Source absolute path to the file. revision (str): The gitile revision of the file. Returns: The content of the source file.""" assert file_path.startswith('//'), 'All file path should start with "//".' assert revision, 'A valid revision is required' dependency = _GetMatchedDependencyRepository(report, file_path) assert dependency, ( '%s file does not belong to any dependency repository' % file_path) # Calculate the relative path to the root of the dependency repository itself. relative_file_path = file_path[len(dependency.path):] repo = CachedGitilesRepository(FinditHttpClient(), dependency.project_url) return repo.GetSource(relative_file_path, revision)
def RunImpl(self, parameters): """Appends a DataPoint to a MasterFlakeAnalysis.""" analysis_urlsafe_key = parameters.analysis_urlsafe_key analysis = ndb.Key(urlsafe=analysis_urlsafe_key).get() assert analysis, 'Analysis unexpectedly missing' flakiness = parameters.flakiness if flakiness_util.MaximumSwarmingTaskRetriesReached(flakiness): run_swarming_util.ReportSwarmingTaskError(analysis, flakiness.error) analysis.LogError( 'Swarming task ended in error after {} failed attempts. Giving ' 'up'.format(flakiness.failed_swarming_task_attempts)) raise pipeline.Abort() git_repo = CachedGitilesRepository(FinditHttpClient(), constants.CHROMIUM_GIT_REPOSITORY_URL) change_log = git_repo.GetChangeLog(flakiness.revision) data_point = data_point_util.ConvertFlakinessToDataPoint(flakiness) data_point.commit_timestamp = change_log.committer.time analysis.data_points.append(data_point) analysis.put()
def GetCommitsBetweenRevisionsInOrder(start_revision, end_revision, repo_url=CHROMIUM_GIT_REPOSITORY_URL, ascending=True, ref=None): """Gets the revisions between start_revision and end_revision. Args: start_revision (str): The revision for which to get changes after. This revision is not included in the returned list. end_revision (str): The last revision in the range to return. repo_url (str): Url of the git repo. Default to chromium repo url. ascending (bool): Whether the commits should be in chronological order. Returns: A list of revisions sorted in order chronologically. """ repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) commits = repo.GetCommitsBetweenRevisions(start_revision, end_revision) if ascending: commits.reverse() return commits
import random from google.appengine.ext import ndb from common.findit_http_client import FinditHttpClient from gae_libs.gitiles.cached_gitiles_repository import CachedGitilesRepository from infra_api_clients.swarming import swarming_util from libs import time_util from model.flake.analysis.flake_culprit import FlakeCulprit from services import constants from services import swarming from services.flake_failure import flake_constants from services.flake_failure import pass_rate_util from waterfall import waterfall_config _GIT_REPO = CachedGitilesRepository(FinditHttpClient(), constants.CHROMIUM_GIT_REPOSITORY_URL) def _BotsAvailableForTask(step_metadata): """Check if there are available bots for a swarming task's dimensions. Args: step_metadata (dict): Info about a step to determine the bot's dimensions to query Swarming with about bot availability. Returns: (bool): Whether or not there are enough bots available to trigger the task immediately. """ if not step_metadata: return False
def _ProcessFullRepositoryData(self, commit, data, full_gs_metadata_dir, builder, build_id): # Load the commit log first so that we could fail fast before redo all. repo_url = 'https://%s/%s.git' % (commit.host, commit.project) change_log = CachedGitilesRepository(FinditHttpClient(), repo_url).GetChangeLog(commit.id) assert change_log is not None, 'Failed to retrieve the commit log' # Load the manifest based on the DEPS file. # TODO(crbug.com/921714): output the manifest as a build output property. manifest = _RetrieveManifest(repo_url, commit.id, 'unix') report = PostsubmitReport.Create( server_host=commit.host, project=commit.project, ref=commit.ref, revision=commit.id, bucket=builder.bucket, builder=builder.builder, commit_timestamp=change_log.committer.time, manifest=manifest, summary_metrics=data.get('summaries'), build_id=build_id, visible=False) report.put() # Save the file-level, directory-level and line-level coverage data. for data_type in ('dirs', 'components', 'files', 'file_shards'): sub_data = data.get(data_type) if not sub_data: continue logging.info('Processing %d entries for %s', len(sub_data), data_type) actual_data_type = data_type if data_type == 'file_shards': actual_data_type = 'files' def FlushEntries(entries, total, last=False): # Flush the data in a batch and release memory. if len(entries) < 100 and not (last and entries): return entries, total ndb.put_multi(entries) total += len(entries) logging.info('Dumped %d coverage data entries of type %s', total, actual_data_type) return [], total def IterateOverFileShards(file_shards): for file_path in file_shards: url = '%s/%s' % (full_gs_metadata_dir, file_path) # Download data one by one. yield _GetValidatedData(url).get('files', []) if data_type == 'file_shards': data_iterator = IterateOverFileShards(sub_data) else: data_iterator = [sub_data] entities = [] total = 0 component_summaries = [] for dataset in data_iterator: for group_data in dataset: if actual_data_type == 'components': component_summaries.append({ 'name': group_data['path'], 'path': group_data['path'], 'summaries': group_data['summaries'], }) if actual_data_type == 'files' and 'revision' in group_data: self._FetchAndSaveFileIfNecessary(report, group_data['path'], group_data['revision']) if actual_data_type == 'files': coverage_data = FileCoverageData.Create( server_host=commit.host, project=commit.project, ref=commit.ref, revision=commit.id, path=group_data['path'], bucket=builder.bucket, builder=builder.builder, data=group_data) else: coverage_data = SummaryCoverageData.Create( server_host=commit.host, project=commit.project, ref=commit.ref, revision=commit.id, data_type=actual_data_type, path=group_data['path'], bucket=builder.bucket, builder=builder.builder, data=group_data) entities.append(coverage_data) entities, total = FlushEntries(entities, total, last=False) del dataset # Explicitly release memory. FlushEntries(entities, total, last=True) if component_summaries: component_summaries.sort(key=lambda x: x['path']) SummaryCoverageData.Create( server_host=commit.host, project=commit.project, ref=commit.ref, revision=commit.id, data_type='components', path='>>', bucket=builder.bucket, builder=builder.builder, data={ 'dirs': component_summaries, 'path': '>>' }).put() component_summaries = [] logging.info('Summary of all components are saved to datastore.') if not _IsReportSuspicious(report): report.visible = True report.put() monitoring.code_coverage_full_reports.increment({ 'host': commit.host, 'project': commit.project, 'ref': commit.ref or 'refs/heads/master', 'builder': '%s/%s/%s' % (builder.project, builder.bucket, builder.builder), }) monitoring.code_coverage_report_timestamp.set( int(time.time()), fields={ 'host': commit.host, 'project': commit.project, 'ref': commit.ref or 'refs/heads/master', 'builder': '%s/%s/%s' % (builder.project, builder.bucket, builder.builder), 'is_success': report.visible, })
def AssignWarmCacheHost(tryjob, cache_name, http_client): """Selects the best possible slave for a given tryjob. We try to get as many of the following conditions as possible: - The bot is available, - The bot has the named cached requested by the tryjob, - The revision to test has already been fetched to the bot's local git cache, - The currently checked out revision at the named cache is the closest to the revision to test, and if possible it's earlier to it (so that bot_update only moves forward, preferably) If a match is found, it is added to the tryjob parameter as a dimension. Args: tryjob (buildbucket_client.TryJob): The ready-to-be-scheduled job. cache_name (str): Previously computed name of the cache to match the referred build's builder and master. http_client: http_client to use for swarming and gitiles requests. """ if not tryjob.is_swarmbucket_build: return request_dimensions = dict([x.split(':', 1) for x in tryjob.dimensions]) bots_with_cache = OnlyAvailable( GetAllBotsWithCache(request_dimensions, cache_name, http_client)) if bots_with_cache: # Flake tryjobs check out older code, so there's little benefit in trying to # optimize the way we do for non-flake tryjobs, we do however select the bot # with the fewest named caches in an effort to avoid unnecessary cache # evictions. if cache_name and cache_name.endswith( flake_constants.FLAKE_CACHE_SUFFIX): selected_bot = _GetBotWithFewestNamedCaches( bots_with_cache)['bot_id'] tryjob.dimensions.append('id:%s' % selected_bot) return git_repo = CachedGitilesRepository( http_client, constants.CHROMIUM_GIT_REPOSITORY_URL) # TODO(crbug.com/800107): Pass revision as a parameter. revision = (tryjob.properties.get('bad_revision') or tryjob.properties.get('test_revision')) if not revision: logging.error('Tryjob %s does not have a specified revision.' % tryjob) return target_commit_position = git_repo.GetChangeLog( revision).commit_position bots_with_rev = _HaveCommitPositionInLocalGitCache( bots_with_cache, target_commit_position) if not bots_with_rev: selected_bot = _GetBotWithFewestNamedCaches( bots_with_cache)['bot_id'] tryjob.dimensions.append('id:' + selected_bot) return bots_with_latest_earlier_rev_checked_out = _ClosestEarlier( bots_with_rev, cache_name, target_commit_position) if bots_with_latest_earlier_rev_checked_out: tryjob.dimensions.append( 'id:' + bots_with_latest_earlier_rev_checked_out['bot_id']) return bots_with_earliest_later_rev_checked_out = _ClosestLater( bots_with_rev, cache_name, target_commit_position) if bots_with_earliest_later_rev_checked_out: tryjob.dimensions.append( 'id:' + bots_with_earliest_later_rev_checked_out['bot_id']) return selected_bot = _GetBotWithFewestNamedCaches(bots_with_rev)['bot_id'] tryjob.dimensions.append('id:' + selected_bot) return else: idle_bots = OnlyAvailable( GetBotsByDimension(request_dimensions, http_client)) if idle_bots: selected_bot = _GetBotWithFewestNamedCaches(idle_bots)['bot_id'] tryjob.dimensions.append('id:' + selected_bot)
from model import result_status from model.flake.flake_culprit import FlakeCulprit from model.flake.flake_try_job import FlakeTryJob from model.flake.flake_try_job_data import FlakeTryJobData from waterfall.flake import confidence from waterfall.flake import lookback_algorithm from waterfall.flake.lookback_algorithm import NormalizedDataPoint from waterfall.flake.process_flake_try_job_result_pipeline import ( ProcessFlakeTryJobResultPipeline) from waterfall.flake.schedule_flake_try_job_pipeline import ( ScheduleFlakeTryJobPipeline) from waterfall.flake.update_flake_bug_pipeline import UpdateFlakeBugPipeline from waterfall.monitor_try_job_pipeline import MonitorTryJobPipeline _GIT_REPO = CachedGitilesRepository( HttpClientAppengine(), 'https://chromium.googlesource.com/chromium/src.git') def CreateCulprit(revision, commit_position, confidence_score, repo_name='chromium'): """Sets culprit information.""" change_log = _GIT_REPO.GetChangeLog(revision) if change_log: url = change_log.code_review_url or change_log.commit_url culprit = FlakeCulprit.Create(repo_name, revision, commit_position, url, confidence_score) else:
def GetAuthor(revision, repo_url=CHROMIUM_GIT_REPOSITORY_URL, ref=None): git_repo = CachedGitilesRepository(FinditHttpClient(), repo_url, ref) change_log = git_repo.GetChangeLog(revision) return change_log.author if change_log else None