def init_mapping(self): logger.info("Downloading Mercurial <-> git mapping file...") vcs_map.download_mapfile() self.tokenized_git_to_mercurial, self.mercurial_to_tokenized_git = microannotate_utils.get_commit_mapping( self.tokenized_git_repo_dir )
def main(): # CLI args parser = argparse.ArgumentParser() parser.add_argument("--nb-tasks", type=int, default=5, help="NB of tasks to create") parser.add_argument( "--unique", choices=("day", "week"), help="Trigger only one task per day or week", ) parser.add_argument( "--group", type=str, default=slugId(), help="Task group to create/update" ) parser.add_argument( "--dry-run", action="store_true", default=False, help="List actions without triggering any new task", ) parser.add_argument( "--codecov-token", type=str, default=os.environ.get("CODECOV_TOKEN"), help="Codecov access token", ) args = parser.parse_args() # Download revision mapper database print("Downloading revision database...") download_mapfile() # List existing tags & commits print("Group", args.group) queue = taskcluster.get_service("queue") try: group = queue.listTaskGroup(args.group) commits = [ task["task"]["payload"]["env"]["REVISION"] for task in group["tasks"] if task["status"]["state"] not in ("failed", "exception") ] print( "Found {} commits processed in task group {}".format( len(commits), args.group ) ) except Exception as e: print("Invalid task group : {}".format(e)) commits = [] # Trigger a task for each commit for commit in list_commits(args.codecov_token, args.nb_tasks, args.unique, commits): print("Triggering commit {mercurial} from {timestamp}".format(**commit)) if args.dry_run: print(">>> No trigger on dry run") else: out = trigger_task(args.group, commit) print(">>>", out["status"]["taskId"])
def find_bug_introducing_commits(cache_dir, git_repo_dir): mercurial_repo_dir = os.path.join(cache_dir, "mozilla-central") logger.info("Downloading Mercurial <-> git mapping file...") vcs_map.download_mapfile() logger.info(f"Cloning mercurial repository to {mercurial_repo_dir}...") repository.clone(mercurial_repo_dir) logger.info(f"Cloning git repository to {git_repo_dir}...") clone_gecko_dev(git_repo_dir) logger.info("Download previously found bug-introducing commits...") db.download_version(BUG_INTRODUCING_COMMITS_DB) if db.is_old_version(BUG_INTRODUCING_COMMITS_DB) or not os.path.exists( BUG_INTRODUCING_COMMITS_DB ): db.download(BUG_INTRODUCING_COMMITS_DB, force=True) logger.info("Get previously found bug-introducing commits...") prev_bug_introducing_commits = list(db.read(BUG_INTRODUCING_COMMITS_DB)) prev_bug_introducing_commits_nodes = set( bug_introducing_commit["bug_fixing_mercurial_rev"] for bug_introducing_commit in prev_bug_introducing_commits ) logger.info(f"Already classified {len(prev_bug_introducing_commits)} commits...") commits_to_ignore = get_commits_to_ignore(mercurial_repo_dir) git_hashes_to_ignore = set(commit["git_rev"] for commit in commits_to_ignore) with open("git_hashes_to_ignore", "w") as f: f.writelines(f"{git_hash}\n" for git_hash in git_hashes_to_ignore) bug_fixing_commits = find_bug_fixing_commits() logger.info(f"{len(bug_fixing_commits)} commits to analyze") # Skip already found bug-introducing commits. bug_fixing_commits = [ bug_fixing_commit for bug_fixing_commit in bug_fixing_commits if bug_fixing_commit["mercurial_rev"] not in prev_bug_introducing_commits_nodes ] logger.info( f"{len(bug_fixing_commits)} commits left to analyze after skipping already analyzed ones" ) bug_fixing_commits = [ bug_fixing_commit for bug_fixing_commit in bug_fixing_commits if bug_fixing_commit["git_rev"] not in git_hashes_to_ignore ] logger.info( f"{len(bug_fixing_commits)} commits left to analyze after skipping the ones in the ignore list" ) def _init(git_repo_dir): global GIT_REPO GIT_REPO = GitRepository(git_repo_dir) def find_bic(bug_fixing_commit): logger.info("Analyzing {}...".format(bug_fixing_commit["git_rev"])) commit = GIT_REPO.get_commit(bug_fixing_commit["git_rev"]) # Skip huge changes, we'll likely be wrong with them. if len(commit.modifications) > MAX_MODIFICATION_NUMBER: return [None] bug_introducing_modifications = GIT_REPO.get_commits_last_modified_lines( commit, hashes_to_ignore_path=os.path.realpath("git_hashes_to_ignore") ) logger.info(bug_introducing_modifications) bug_introducing_commits = [] for bug_introducing_hashes in bug_introducing_modifications.values(): for bug_introducing_hash in bug_introducing_hashes: bug_introducing_commits.append( { "bug_fixing_mercurial_rev": bug_fixing_commit["mercurial_rev"], "bug_fixing_git_rev": bug_fixing_commit["git_rev"], "bug_introducing_mercurial_rev": vcs_map.git_to_mercurial( bug_introducing_hash ), "bug_introducing_git_rev": bug_introducing_hash, } ) # Add an empty result, just so that we don't reanalyze this again. if len(bug_introducing_commits) == 0: bug_introducing_commits.append( { "bug_fixing_mercurial_rev": bug_fixing_commit["mercurial_rev"], "bug_fixing_git_rev": bug_fixing_commit["git_rev"], "bug_introducing_mercurial_rev": "", "bug_introducing_git_rev": "", } ) return bug_introducing_commits with concurrent.futures.ThreadPoolExecutor( initializer=_init, initargs=(git_repo_dir,), max_workers=os.cpu_count() + 1 ) as executor: bug_introducing_commits = executor.map(find_bic, bug_fixing_commits) bug_introducing_commits = tqdm( bug_introducing_commits, total=len(bug_fixing_commits) ) bug_introducing_commits = list( itertools.chain.from_iterable(bug_introducing_commits) ) total_results_num = len(bug_introducing_commits) bug_introducing_commits = list(filter(None, bug_introducing_commits)) logger.info( f"Skipped {total_results_num - len(bug_introducing_commits)} commits as they were too big" ) db.append(BUG_INTRODUCING_COMMITS_DB, bug_introducing_commits) compress_file(BUG_INTRODUCING_COMMITS_DB)