def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") if db.is_old_version( IGNORED_COMMITS_DB) or not db.exists(IGNORED_COMMITS_DB): db.download(IGNORED_COMMITS_DB, force=True) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info( f"Already found {len(prev_commits_to_ignore)} commits to ignore..." ) if len(prev_commits_to_ignore) > 0: rev_start = "children({})".format( prev_commits_to_ignore[-1]["rev"]) else: rev_start = 0 # 2 days more than the end date, so we can know if a commit was backed-out. # We have to do this as recent commits might be missing in the mercurial <-> git map, # otherwise we could just use "tip". end_date = datetime.now() - RELATIVE_END_DATE + relativedelta(2) with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs( hg, rev_start, "pushdate('{}')".format(end_date.strftime("%Y-%m-%d"))) # Given that we use the pushdate, there might be cases where the starting commit is returned too (e.g. if we rerun the task on the same day). if len(prev_commits_to_ignore) > 0: found_prev = -1 for i, rev in enumerate(revs): if rev.decode("utf-8") == prev_commits_to_ignore[-1]["rev"]: found_prev = i break revs = revs[found_prev + 1:] commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) repository.set_commits_to_ignore(self.mercurial_repo_dir, commits) commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append({ "rev": commit.node, "type": "backedout" if commit.backedoutby else "", }) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") logger.info("...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"))) db.append(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) return prev_commits_to_ignore + commits_to_ignore
def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") db.download(IGNORED_COMMITS_DB) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info( f"Already found {len(prev_commits_to_ignore)} commits to ignore..." ) # When we already have some analyzed commits, re-analyze the last 3500 to make sure # we didn't miss back-outs that happened since the last analysis. if len(prev_commits_to_ignore) > 0: first_commit_to_reanalyze = ( -3500 if len(prev_commits_to_ignore) >= 3500 else 0) rev_start = "children({})".format( prev_commits_to_ignore[first_commit_to_reanalyze]["rev"]) else: rev_start = 0 with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs(hg, rev_start) commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) with hglib.open(self.mercurial_repo_dir) as hg: repository.set_commits_to_ignore(hg, self.mercurial_repo_dir, commits) for commit in commits: commit.ignored |= commit.author_email == "*****@*****.**" chosen_commits = set() commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append({ "rev": commit.node, "type": "backedout" if commit.backedoutby else "", }) chosen_commits.add(commit.node) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") for prev_commit in prev_commits_to_ignore[::-1]: if prev_commit["rev"] not in chosen_commits: commits_to_ignore.append(prev_commit) chosen_commits.add(prev_commit["rev"]) logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info("...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"))) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)
def test_set_commits_to_ignore(tmpdir): tmp_path = tmpdir.strpath with open(os.path.join(tmp_path, ".hg-annotate-ignore-revs"), "w") as f: f.write("commit1\ncommit2\n8ba995b74e18334ab3707f27e9eb8f4e37ba3d29\n") def create_commit(node, desc, bug_id, backedoutby): return repository.Commit( node=node, author="author", desc=desc, date=datetime(2019, 1, 1), pushdate=datetime(2019, 1, 1), bug_id=bug_id, backedoutby=backedoutby, author_email="*****@*****.**", reviewers=("reviewer1", "reviewer2"), ).set_files(["dom/file1.cpp"], {}) commits = [ create_commit("commit", "", 123, ""), create_commit("commit_backout", "", 123, ""), create_commit("commit_backedout", "", 123, "commit_backout"), create_commit("commit_no_bug", "", None, ""), create_commit( "8ba995b74e18334ab3707f27e9eb8f4e37ba3d29", "commit in .hg-annotate-ignore-revs", 123, "", ), create_commit( "commit_with_ignore_in_desc", "prova\nignore-this-changeset\n", 123, "" ), ] repository.set_commits_to_ignore(tmp_path, commits) leftovers = [commit for commit in commits if commit.ignored] assert len(leftovers) == 4 assert set(commit.node for commit in leftovers) == { "commit_backout", "commit_no_bug", "8ba995b74e18334ab3707f27e9eb8f4e37ba3d29", "commit_with_ignore_in_desc", }
def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") db.download(IGNORED_COMMITS_DB) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info(f"Already found {len(prev_commits_to_ignore)} commits to ignore...") # When we already have some analyzed commits, re-analyze the last 3500 to make sure # we didn't miss back-outs that happened since the last analysis. if len(prev_commits_to_ignore) > 0: first_commit_to_reanalyze = ( -3500 if len(prev_commits_to_ignore) >= 3500 else 0 ) rev_start = "children({})".format( prev_commits_to_ignore[first_commit_to_reanalyze]["rev"] ) else: rev_start = 0 with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs(hg, rev_start) # Drop commits which are not yet present in the mercurial <-> git mapping. while len(revs) > 0: try: vcs_map.mercurial_to_git(revs[-1].decode("ascii")) break except Exception as e: if not str(e).startswith("Missing mercurial commit in the VCS map"): raise revs.pop() commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) repository.set_commits_to_ignore(self.mercurial_repo_dir, commits) chosen_commits = set() commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append( { "rev": commit.node, "type": "backedout" if commit.backedoutby else "", } ) chosen_commits.add(commit.node) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") for prev_commit in prev_commits_to_ignore[::-1]: if prev_commit["rev"] not in chosen_commits: commits_to_ignore.append(prev_commit) chosen_commits.add(prev_commit["rev"]) logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info( "...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout") ) ) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)