def append_bug_fixing_commits(bug_id, type_): for commit in commit_map[bug_id]: bug_fixing_commits.append( { "mercurial_rev": commit["node"], "git_rev": vcs_map.mercurial_to_git(commit["node"]), "type": type_, } )
def append_commits_to_ignore(commits, type_): for commit in commits: commits_to_ignore.append( { "mercurial_rev": commit.node, "git_rev": vcs_map.mercurial_to_git(commit.node), "type": type_, } )
def classify_methods(self, commit): # Get commit hash from 4 months before the analysis time. # The method-level analyzer needs 4 months of history. stop_hash = None four_months_ago = datetime.utcnow() - relativedelta(months=4) for commit in repository.get_commits(): if dateutil.parser.parse(commit["pushdate"]) >= four_months_ago: stop_hash = tuple( vcs_map.mercurial_to_git(self.git_repo_dir, [commit["node"]]))[0] break assert stop_hash is not None p = subprocess.run( [ "git", "rev-list", "-n", "1", "HEAD", ], check=True, capture_output=True, cwd=self.git_repo_dir, ) start_hash = p.stdout.decode().strip() # Run the method-level analyzer. subprocess.run( [ "python3", "tester.py", "--repo", self.git_repo_dir, "--start", start_hash, "--stop", stop_hash, "--output", os.path.abspath("method_level.csv"), ], cwd=self.method_defect_predictor_dir, ) method_level_results = [] try: with open("method_level.csv", "r") as f: reader = csv.DictReader(f) for item in reader: item["past_bugs"] = [] method_level_results.append(item) except FileNotFoundError: # No methods were classified. pass for method_level_result in method_level_results: method_level_result_path = method_level_result["file_name"] if method_level_result_path not in self.past_bugs_by_function: continue for path, functions in commit["functions"].items(): if method_level_result_path != path: continue for function_name, _, _ in functions: if function_name not in self.past_bugs_by_function[path]: continue if method_level_result["method_name"].endswith( function_name): method_level_result["past_bugs"] = list( self.past_bugs_by_function[path][function_name] ["bugs"]) with open("method_level.json", "w") as f: json.dump(method_level_results, f)
def apply_phab(self, hg, phabricator_deployment, diff_id): if phabricator_deployment == PHAB_PROD: api_key = get_secret("PHABRICATOR_TOKEN") url = get_secret("PHABRICATOR_URL") else: api_key = get_secret("PHABRICATOR_DEV_TOKEN") url = get_secret("PHABRICATOR_DEV_URL") phabricator_api = PhabricatorAPI(api_key=api_key, url=url) # Get the stack of patches stack = phabricator_api.load_patches_stack(diff_id) assert len(stack) > 0, "No patches to apply" # Find the first unknown base revision needed_stack = [] revisions = {} for patch in reversed(stack): needed_stack.insert(0, patch) # Stop as soon as a base revision is available if self.has_revision(hg, patch.base_revision): logger.info( f"Stopping at diff {patch.id} and revision {patch.base_revision}" ) break if not needed_stack: logger.info("All the patches are already applied") return # Load all the diff revisions diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack]) revisions = { diff["phid"]: phabricator_api.load_revision(rev_phid=diff["revisionPHID"], attachments={"reviewers": True}) for diff in diffs } # Update repo to base revision hg_base = needed_stack[0].base_revision if not self.has_revision(hg, hg_base): logger.warning( "Missing base revision {} from Phabricator".format(hg_base)) hg_base = "tip" if hg_base: hg.update(rev=hg_base, clean=True) logger.info(f"Updated repo to {hg_base}") if self.git_repo_dir and hg_base != "tip": try: self.git_base = tuple( vcs_map.mercurial_to_git(self.git_repo_dir, [hg_base]))[0] subprocess.run( [ "git", "checkout", "-b", "analysis_branch", self.git_base ], check=True, cwd=self.git_repo_dir, ) logger.info(f"Updated git repo to {self.git_base}") except Exception as e: logger.info( f"Updating git repo to Mercurial {hg_base} failed: {e}" ) def load_user(phid): if phid.startswith("PHID-USER"): return phabricator_api.load_user(user_phid=phid) elif phid.startswith("PHID-PROJ"): # TODO: Support group reviewers somehow. logger.info(f"Skipping group reviewer {phid}") else: raise Exception(f"Unsupported reviewer {phid}") for patch in needed_stack: revision = revisions[patch.phid] message = "{}\n\n{}".format(revision["fields"]["title"], revision["fields"]["summary"]) author_name = None author_email = None if patch.commits: author_name = patch.commits[0]["author"]["name"] author_email = patch.commits[0]["author"]["email"] if author_name is None: author = load_user(revision["fields"]["authorPHID"]) author_name = author["fields"]["realName"] # XXX: Figure out a way to know the email address of the author. author_email = author["fields"]["username"] reviewers = list( filter( None, (load_user(reviewer["reviewerPHID"]) for reviewer in revision["attachments"]["reviewers"]["reviewers"]), )) reviewers = set(reviewer["fields"]["username"] for reviewer in reviewers) if len(reviewers): message = replace_reviewers(message, reviewers) logger.info( f"Applying {patch.phid} from revision {revision['id']}: {message}" ) hg.import_( patches=io.BytesIO(patch.patch.encode("utf-8")), message=message.encode("utf-8"), user=f"{author_name} <{author_email}>".encode("utf-8"), ) if self.git_repo_dir: patch_proc = subprocess.Popen( ["patch", "-p1", "--no-backup-if-mismatch", "--force"], stdin=subprocess.PIPE, cwd=self.git_repo_dir, ) patch_proc.communicate(patch.patch.encode("utf-8")) assert patch_proc.returncode == 0, "Failed to apply patch" subprocess.run( [ "git", "-c", f"user.name={author_name}", "-c", f"user.email={author_email}", "commit", "-am", message, ], check=True, cwd=self.git_repo_dir, )
def mercurial_to_git(rev): if tokenized: return self.mercurial_to_tokenized_git[rev] else: return vcs_map.mercurial_to_git(rev)
def apply_phab(self, hg, diff_id): def has_revision(revision): if not revision: return False try: hg.identify(revision) return True except hglib.error.CommandError: return False phabricator_api = PhabricatorAPI( api_key=get_secret("PHABRICATOR_TOKEN"), url=get_secret("PHABRICATOR_URL")) # Get the stack of patches stack = phabricator_api.load_patches_stack(diff_id) assert len(stack) > 0, "No patches to apply" # Find the first unknown base revision needed_stack = [] revisions = {} for patch in reversed(stack): needed_stack.insert(0, patch) # Stop as soon as a base revision is available if has_revision(patch.base_revision): logger.info( f"Stopping at diff {patch.id} and revision {patch.base_revision}" ) break if not needed_stack: logger.info("All the patches are already applied") return # Load all the diff revisions diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack]) revisions = { diff["phid"]: phabricator_api.load_revision(rev_phid=diff["revisionPHID"], attachments={"reviewers": True}) for diff in diffs } # Update repo to base revision hg_base = needed_stack[0].base_revision if not has_revision(hg_base): logger.warning( "Missing base revision {} from Phabricator".format(hg_base)) hg_base = "tip" if hg_base: hg.update(rev=hg_base, clean=True) logger.info(f"Updated repo to {hg_base}") try: self.git_base = vcs_map.mercurial_to_git(hg_base) subprocess.run( [ "git", "checkout", "-b", "analysis_branch", self.git_base ], check=True, cwd=self.git_repo_dir, ) logger.info(f"Updated git repo to {self.git_base}") except Exception as e: logger.info( f"Updating git repo to Mercurial {hg_base} failed: {e}") def load_user(phid): if phid.startswith("PHID-USER"): return phabricator_api.load_user(user_phid=phid) elif phid.startswith("PHID-PROJ"): # TODO: Support group reviewers somehow. logger.info(f"Skipping group reviewer {phid}") else: raise Exception(f"Unsupported reviewer {phid}") for patch in needed_stack: revision = revisions[patch.phid] message = "{}\n\n{}".format(revision["fields"]["title"], revision["fields"]["summary"]) author_name = None author_email = None if patch.commits: author_name = patch.commits[0]["author"]["name"] author_email = patch.commits[0]["author"]["email"] if author_name is None: author = load_user(revision["fields"]["authorPHID"]) author_name = author["fields"]["realName"] # XXX: Figure out a way to know the email address of the author. author_email = author["fields"]["username"] reviewers = list( filter( None, (load_user(reviewer["reviewerPHID"]) for reviewer in revision["attachments"]["reviewers"]["reviewers"]), )) reviewers = set(reviewer["fields"]["username"] for reviewer in reviewers) if len(reviewers): message = replace_reviewers(message, reviewers) logger.info( f"Applying {patch.phid} from revision {revision['id']}: {message}" ) hg.import_( patches=io.BytesIO(patch.patch.encode("utf-8")), message=message.encode("utf-8"), user=f"{author_name} <{author_email}>".encode("utf-8"), ) with tempfile.TemporaryDirectory() as tmpdirname: temp_file = os.path.join(tmpdirname, "temp.patch") with open(temp_file, "w") as f: f.write(patch.patch) subprocess.run( ["git", "apply", "--3way", temp_file], check=True, cwd=self.git_repo_dir, ) subprocess.run( [ "git", "-c", f"user.name={author_name}", "-c", f"user.email={author_email}", "commit", "-am", message, ], check=True, cwd=self.git_repo_dir, )
def mercurial_to_git(revs): if tokenized: return (self.mercurial_to_tokenized_git[rev] for rev in revs) else: yield from vcs_map.mercurial_to_git(repo_dir, revs)
def apply_phab(self, hg, diff_id): def has_revision(revision): if not revision: return False try: hg.identify(revision) return True except hglib.error.CommandError: return False phabricator_api = PhabricatorAPI( api_key=get_secret("PHABRICATOR_TOKEN"), url=get_secret("PHABRICATOR_URL") ) # Get the stack of patches stack = phabricator_api.load_patches_stack(diff_id) assert len(stack) > 0, "No patches to apply" # Find the first unknown base revision needed_stack = [] revisions = {} for patch in reversed(stack): needed_stack.insert(0, patch) # Stop as soon as a base revision is available if has_revision(patch.base_revision): logger.info( f"Stopping at diff {patch.id} and revision {patch.base_revision}" ) break if not needed_stack: logger.info("All the patches are already applied") return # Load all the diff revisions diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack]) revisions = { diff["phid"]: phabricator_api.load_revision(rev_phid=diff["revisionPHID"]) for diff in diffs } # Update repo to base revision hg_base = needed_stack[0].base_revision if not has_revision(hg_base): logger.warning("Missing base revision {} from Phabricator".format(hg_base)) hg_base = "tip" if hg_base: hg.update(rev=hg_base, clean=True) logger.info(f"Updated repo to {hg_base}") try: self.git_base = vcs_map.mercurial_to_git(hg_base) subprocess.run( ["git", "checkout", "-b", "analysis_branch", self.git_base], check=True, cwd=self.git_repo_dir, ) logger.info(f"Updated git repo to {self.git_base}") except Exception as e: logger.info(f"Updating git repo to Mercurial {hg_base} failed: {e}") for patch in needed_stack: revision = revisions[patch.phid] if patch.commits: message = patch.commits[0]["message"] author_name = patch.commits[0]["author"]["name"] author_email = patch.commits[0]["author"]["email"] else: message = revision["fields"]["title"] author_name = "bugbug" author_email = "*****@*****.**" logger.info( f"Applying {patch.phid} from revision {revision['id']}: {message}" ) hg.import_( patches=io.BytesIO(patch.patch.encode("utf-8")), message=message.encode("utf-8"), user=f"{author_name} <{author_email}>".encode("utf-8"), ) with tempfile.TemporaryDirectory() as tmpdirname: temp_file = os.path.join(tmpdirname, "temp.patch") with open(temp_file, "w") as f: f.write(patch.patch) subprocess.run( ["git", "apply", "--3way", temp_file], check=True, cwd=self.git_repo_dir, ) subprocess.run( [ "git", "-c", f"user.name={author_name}", "-c", f"user.email={author_email}", "commit", "-am", message, ], check=True, cwd=self.git_repo_dir, )
def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") db.download(IGNORED_COMMITS_DB) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info(f"Already found {len(prev_commits_to_ignore)} commits to ignore...") # When we already have some analyzed commits, re-analyze the last 3500 to make sure # we didn't miss back-outs that happened since the last analysis. if len(prev_commits_to_ignore) > 0: first_commit_to_reanalyze = ( -3500 if len(prev_commits_to_ignore) >= 3500 else 0 ) rev_start = "children({})".format( prev_commits_to_ignore[first_commit_to_reanalyze]["rev"] ) else: rev_start = 0 with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs(hg, rev_start) # Drop commits which are not yet present in the mercurial <-> git mapping. while len(revs) > 0: try: vcs_map.mercurial_to_git(revs[-1].decode("ascii")) break except Exception as e: if not str(e).startswith("Missing mercurial commit in the VCS map"): raise revs.pop() commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) repository.set_commits_to_ignore(self.mercurial_repo_dir, commits) chosen_commits = set() commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append( { "rev": commit.node, "type": "backedout" if commit.backedoutby else "", } ) chosen_commits.add(commit.node) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") for prev_commit in prev_commits_to_ignore[::-1]: if prev_commit["rev"] not in chosen_commits: commits_to_ignore.append(prev_commit) chosen_commits.add(prev_commit["rev"]) logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info( "...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout") ) ) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)