Ejemplo n.º 1
0
def test_get_revs(fake_hg_repo):
    hg, local, remote = fake_hg_repo

    add_file(hg, local, "file1", "1\n2\n3\n4\n5\n6\n7\n")
    revision1 = commit(hg)

    revs = repository.get_revs(hg)

    assert len(revs) == 1, "There should be one revision now"
    assert revs[0].decode("ascii") == revision1

    add_file(hg, local, "file2", "1\n2\n3\n4\n5\n6\n7\n")
    revision2 = commit(hg)

    revs = repository.get_revs(hg)

    assert len(revs) == 2, "There should be two revisions now"
    assert revs[0].decode("ascii") == revision1
    assert revs[1].decode("ascii") == revision2

    add_file(hg, local, "file3", "1\n2\n3\n4\n5\n6\n7\n")
    revision3 = commit(hg)

    revs = repository.get_revs(hg)

    assert len(revs) == 3, "There should be three revisions now"
    assert revs[0].decode("ascii") == revision1
    assert revs[1].decode("ascii") == revision2
    assert revs[2].decode("ascii") == revision3

    revs = repository.get_revs(hg, revision2)

    assert len(revs) == 2, "There should be two revisions after the first"
    assert revs[0].decode("ascii") == revision2
    assert revs[1].decode("ascii") == revision3
Ejemplo n.º 2
0
    def get_commits_to_ignore(self):
        logger.info("Download previous commits to ignore...")
        if db.is_old_version(
                IGNORED_COMMITS_DB) or not db.exists(IGNORED_COMMITS_DB):
            db.download(IGNORED_COMMITS_DB, force=True)

        logger.info("Get previously classified commits...")
        prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB))
        logger.info(
            f"Already found {len(prev_commits_to_ignore)} commits to ignore..."
        )

        if len(prev_commits_to_ignore) > 0:
            rev_start = "children({})".format(
                prev_commits_to_ignore[-1]["rev"])
        else:
            rev_start = 0

        # 2 days more than the end date, so we can know if a commit was backed-out.
        # We have to do this as recent commits might be missing in the mercurial <-> git map,
        # otherwise we could just use "tip".
        end_date = datetime.now() - RELATIVE_END_DATE + relativedelta(2)
        with hglib.open(self.mercurial_repo_dir) as hg:
            revs = repository.get_revs(
                hg, rev_start,
                "pushdate('{}')".format(end_date.strftime("%Y-%m-%d")))

        # Given that we use the pushdate, there might be cases where the starting commit is returned too (e.g. if we rerun the task on the same day).
        if len(prev_commits_to_ignore) > 0:
            found_prev = -1
            for i, rev in enumerate(revs):
                if rev.decode("utf-8") == prev_commits_to_ignore[-1]["rev"]:
                    found_prev = i
                    break
            revs = revs[found_prev + 1:]

        commits = repository.hg_log_multi(self.mercurial_repo_dir, revs)

        repository.set_commits_to_ignore(self.mercurial_repo_dir, commits)
        commits_to_ignore = []

        for commit in commits:
            if commit.ignored or commit.backedoutby:
                commits_to_ignore.append({
                    "rev":
                    commit.node,
                    "type":
                    "backedout" if commit.backedoutby else "",
                })

        logger.info(f"{len(commits_to_ignore)} new commits to ignore...")

        logger.info("...of which {} are backed-out".format(
            sum(1 for commit in commits_to_ignore
                if commit["type"] == "backedout")))

        db.append(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)

        return prev_commits_to_ignore + commits_to_ignore
Ejemplo n.º 3
0
    def get_commits_to_ignore(self):
        logger.info("Download previous commits to ignore...")
        db.download(IGNORED_COMMITS_DB)

        logger.info("Get previously classified commits...")
        prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB))
        logger.info(
            f"Already found {len(prev_commits_to_ignore)} commits to ignore..."
        )

        # When we already have some analyzed commits, re-analyze the last 3500 to make sure
        # we didn't miss back-outs that happened since the last analysis.
        if len(prev_commits_to_ignore) > 0:
            first_commit_to_reanalyze = (
                -3500 if len(prev_commits_to_ignore) >= 3500 else 0)
            rev_start = "children({})".format(
                prev_commits_to_ignore[first_commit_to_reanalyze]["rev"])
        else:
            rev_start = 0

        with hglib.open(self.mercurial_repo_dir) as hg:
            revs = repository.get_revs(hg, rev_start)

        commits = repository.hg_log_multi(self.mercurial_repo_dir, revs)

        with hglib.open(self.mercurial_repo_dir) as hg:
            repository.set_commits_to_ignore(hg, self.mercurial_repo_dir,
                                             commits)

        for commit in commits:
            commit.ignored |= commit.author_email == "*****@*****.**"

        chosen_commits = set()
        commits_to_ignore = []
        for commit in commits:
            if commit.ignored or commit.backedoutby:
                commits_to_ignore.append({
                    "rev":
                    commit.node,
                    "type":
                    "backedout" if commit.backedoutby else "",
                })
                chosen_commits.add(commit.node)

        logger.info(f"{len(commits_to_ignore)} new commits to ignore...")

        for prev_commit in prev_commits_to_ignore[::-1]:
            if prev_commit["rev"] not in chosen_commits:
                commits_to_ignore.append(prev_commit)
                chosen_commits.add(prev_commit["rev"])

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        logger.info("...of which {} are backed-out".format(
            sum(1 for commit in commits_to_ignore
                if commit["type"] == "backedout")))

        db.write(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)
        db.upload(IGNORED_COMMITS_DB)
Ejemplo n.º 4
0
    def retrieve_commits(self, limit):
        repository.clone(self.repo_dir)

        if limit:
            # Mercurial revset supports negative integers starting from tip
            rev_start = -limit
        else:
            db.download(repository.COMMITS_DB, support_files_too=True)

            rev_start = 0
            for commit in repository.get_commits():
                rev_start = f"children({commit['node']})"

        with hglib.open(self.repo_dir) as hg:
            revs = repository.get_revs(hg, rev_start)

        chunk_size = 70000

        for i in range(0, len(revs), chunk_size):
            repository.download_commits(self.repo_dir,
                                        revs=revs[i:(i + chunk_size)])

        logger.info("commit data extracted from repository")

        # Some commits that were already in the DB from the previous run might need
        # to be updated (e.g. coverage information).
        repository.update_commits()

        zstd_compress(repository.COMMITS_DB)
        create_tar_zst(os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
def go(repo_dir):
    with hglib.open(repo_dir) as hg:
        revs = repository.get_revs(hg, -1000, -500)
        commits = repository.hg_log(hg, revs)
        backouts = list(
            set(commit.backedoutby for commit in commits
                if commit.ever_backedout))
        backedouts = list(
            set(commit.node for commit in commits if commit.ever_backedout))

    likely_label_count = 0
    possible_label_count = 0
    likely_group_count = 0
    possible_group_count = 0

    backout_regressions = {}

    for backout in tqdm(backouts):
        p = Push(backout)

        label_regressions = p.get_regressions("label")
        likely_label_count += len(p.get_likely_regressions("label"))
        possible_label_count += len(p.get_possible_regressions("label"))

        group_regressions = p.get_regressions("group")
        likely_group_count += len(p.get_likely_regressions("label"))
        possible_group_count += len(p.get_possible_regressions("label"))

        if len(label_regressions) > 0 or len(group_regressions) > 0:
            backout_regressions[backout] = {
                "label": label_regressions,
                "group": group_regressions,
            }

    print(f"Likely labels for backouts: {likely_label_count}")
    print(f"Likely groups for backouts: {likely_group_count}")
    print(f"Possible labels for backouts: {possible_label_count}")
    print(f"Possible groups for backouts: {possible_group_count}")

    backedout_regressions = {}

    for backedout in tqdm(backedouts):
        p = Push(backedout)

        label_regressions = p.get_regressions("label")
        group_regressions = p.get_regressions("group")

        if (len(p.get_likely_regressions("label")) == 0
                or len(p.get_likely_regressions("group")) == 0):
            backedout_regressions[backedout] = {
                "label": label_regressions,
                "group": group_regressions,
            }

    with open("backout_regressions.json", "w") as f:
        json.dump(backout_regressions, f)

    with open("backedout_regressions.json", "w") as f:
        json.dump(backedout_regressions, f)
Ejemplo n.º 6
0
def test_get_revs(fake_hg_repo):
    hg, local, remote = fake_hg_repo

    add_file(hg, local, "file1", "1\n2\n3\n4\n5\n6\n7\n")
    revision1 = commit(hg)

    revs = repository.get_revs(hg)

    assert len(revs) == 1, "There should be one revision now"
    assert revs[0].decode("ascii") == revision1

    add_file(hg, local, "file2", "1\n2\n3\n4\n5\n6\n7\n")
    revision2 = commit(hg)

    revs = repository.get_revs(hg)

    assert len(revs) == 2, "There should be two revisions now"
    assert revs[0].decode("ascii") == revision1
    assert revs[1].decode("ascii") == revision2
Ejemplo n.º 7
0
def test_hg_modified_files(fake_hg_repo):
    hg, local, remote = fake_hg_repo

    add_file(hg, local, "f1", "1\n2\n3\n4\n5\n6\n7\n")
    revision1 = commit(hg, date=datetime(1991, 4, 16, tzinfo=timezone.utc))

    add_file(hg, local, "f2", "1\n2\n3\n4\n5\n6\n7\n")
    revision2 = commit(hg, "Bug 123 - Prova. r=moz,rev2")

    hg.copy(
        bytes(os.path.join(local, "f2"), "ascii"),
        bytes(os.path.join(local, "f2copy"), "ascii"),
    )
    revision3 = commit(hg, "Copy")

    hg.move(
        bytes(os.path.join(local, "f2copy"), "ascii"),
        bytes(os.path.join(local, "f2copymove"), "ascii"),
    )
    revision4 = commit(hg, "Move")

    hg.push(dest=bytes(remote, "ascii"))
    revs = repository.get_revs(hg, revision1)
    commits = repository.hg_log(hg, revs)

    repository.path_to_component = {}

    for c in commits:
        repository.hg_modified_files(hg, c)

    assert commits[0].node == revision1
    assert commits[0].files == ["f1"]
    assert commits[0].file_copies == {}

    assert commits[1].node == revision2
    assert commits[1].files == ["f2"]
    assert commits[1].file_copies == {}

    assert commits[2].node == revision3
    assert commits[2].files == ["f2copy"]
    assert commits[2].file_copies == {"f2": "f2copy"}

    assert commits[3].node == revision4
    assert commits[3].files == ["f2copy", "f2copymove"]
    assert commits[3].file_copies == {"f2copy": "f2copymove"}
Ejemplo n.º 8
0
def get_commits_to_ignore(repo_dir):
    commits_to_ignore = []

    # TODO: Make repository analyze all commits, even those to ignore, but add a field "ignore" or a function should_ignore that analyzes the commit data. This way we don't have to clone the Mercurial repository in this script.
    with hglib.open(repo_dir) as hg:
        revs = repository.get_revs(hg, -10000)

    commits = repository.hg_log_multi(repo_dir, revs)

    commits_to_ignore = []

    def append_commits_to_ignore(commits, type_):
        for commit in commits:
            commits_to_ignore.append(
                {
                    "mercurial_rev": commit.node,
                    "git_rev": vcs_map.mercurial_to_git(commit.node),
                    "type": type_,
                }
            )

    append_commits_to_ignore(
        list(repository.get_commits_to_ignore(repo_dir, commits)), ""
    )

    logger.info(
        f"{len(commits_to_ignore)} commits to ignore (excluding backed-out commits)"
    )

    append_commits_to_ignore(
        (commit for commit in commits if commit.backedoutby), "backedout"
    )

    logger.info(
        f"{len(commits_to_ignore)} commits to ignore (including backed-out commits)"
    )

    with open("commits_to_ignore.csv", "w") as f:
        writer = csv.DictWriter(f, fieldnames=["mercurial_rev", "git_rev", "type"])
        writer.writeheader()
        writer.writerows(commits_to_ignore)

    return commits_to_ignore
Ejemplo n.º 9
0
def test_hg_log(fake_hg_repo):
    hg, local, remote = fake_hg_repo

    add_file(hg, local, "file1", "1\n2\n3\n4\n5\n6\n7\n")
    revision1 = commit(hg, date=datetime(1991, 4, 16, tzinfo=timezone.utc))

    first_push_date = datetime.utcnow()
    hg.push(dest=bytes(remote, "ascii"))

    add_file(hg, local, "file2", "1\n2\n3\n4\n5\n6\n7\n")
    revision2 = commit(hg, "Bug 123 - Prova. r=moz,rev2")

    hg.copy(
        bytes(os.path.join(local, "file2"), "ascii"),
        bytes(os.path.join(local, "file2copy"), "ascii"),
    )
    revision3 = commit(hg)

    hg.move(
        bytes(os.path.join(local, "file2copy"), "ascii"),
        bytes(os.path.join(local, "file2copymove"), "ascii"),
    )
    revision4 = commit(hg)

    hg.backout(
        rev=revision4,
        message=f"Backout {revision4[:12]}",
        user="******",
        date=datetime(2019, 4, 16, tzinfo=timezone.utc),
    )
    revision5 = hg.log(limit=1)[0][1].decode("ascii")

    # Wait one second, to have a different pushdate.
    time.sleep(1)

    second_push_date = datetime.utcnow()
    hg.push(dest=bytes(remote, "ascii"))

    add_file(hg, local, "file3", "1\n2\n3\n4\n5\n6\n7\n")
    revision6 = commit(hg)

    copy_pushlog_database(remote, local)

    revs = repository.get_revs(hg)

    # Wait one second, to have a different pushdate.
    time.sleep(1)

    hg_log_date = datetime.utcnow()

    commits = repository.hg_log(hg, revs)
    assert len(commits) == 6, "hg log should return six commits"

    assert commits[0].node == revision1
    assert commits[0].author == "Moz Illa <*****@*****.**>"
    assert commits[0].desc == "Commit A file1"
    assert commits[0].date == datetime(1991, 4, 16)
    assert (first_push_date - relativedelta(seconds=1) <= commits[0].pushdate
            <= first_push_date + relativedelta(seconds=1))
    assert commits[0].bug_id is None
    assert commits[0].backedoutby == ""
    assert commits[0].author_email == "*****@*****.**"
    assert commits[0].reviewers == tuple()

    assert commits[1].node == revision2
    assert commits[1].author == "Moz Illa <*****@*****.**>"
    assert commits[1].desc == "Bug 123 - Prova. r=moz,rev2"
    assert commits[1].date == datetime(2019, 4, 16)
    assert (second_push_date - relativedelta(seconds=1) <= commits[1].pushdate
            <= second_push_date + relativedelta(seconds=1))
    assert commits[1].bug_id == 123
    assert commits[1].backedoutby == ""
    assert commits[1].author_email == "*****@*****.**"
    assert set(commits[1].reviewers) == {"moz", "rev2"}

    assert commits[2].node == revision3
    assert commits[2].author == "Moz Illa <*****@*****.**>"
    assert commits[2].desc == "Commit A file2copy"
    assert commits[2].date == datetime(2019, 4, 16)
    assert (second_push_date - relativedelta(seconds=1) <= commits[2].pushdate
            <= second_push_date + relativedelta(seconds=1))
    assert commits[2].bug_id is None
    assert commits[2].backedoutby == ""
    assert commits[2].author_email == "*****@*****.**"
    assert commits[2].reviewers == tuple()

    assert commits[3].node == revision4
    assert commits[3].author == "Moz Illa <*****@*****.**>"
    assert commits[3].desc == "Commit A file2copymove R file2copy"
    assert commits[3].date == datetime(2019, 4, 16)
    assert (second_push_date - relativedelta(seconds=1) <= commits[3].pushdate
            <= second_push_date + relativedelta(seconds=1))
    assert commits[3].bug_id is None
    assert commits[3].backedoutby == revision5
    assert commits[3].author_email == "*****@*****.**"
    assert commits[3].reviewers == tuple()

    assert commits[4].node == revision5
    assert commits[4].author == "sheriff"
    assert commits[4].desc == f"Backout {revision4[:12]}"
    assert commits[4].date == datetime(2019, 4, 16)
    assert (second_push_date - relativedelta(seconds=1) <= commits[4].pushdate
            <= second_push_date + relativedelta(seconds=1))
    assert commits[4].bug_id is None
    assert commits[4].backedoutby == ""
    assert commits[4].author_email == "sheriff"
    assert commits[4].reviewers == tuple()

    assert commits[5].node == revision6
    assert commits[5].author == "Moz Illa <*****@*****.**>"
    assert commits[5].desc == "Commit A file3"
    assert commits[5].date == datetime(2019, 4, 16)
    assert (hg_log_date - relativedelta(seconds=1) <= commits[5].pushdate <=
            hg_log_date + relativedelta(seconds=1))
    assert commits[5].bug_id is None
    assert commits[5].backedoutby == ""
    assert commits[5].author_email == "*****@*****.**"
    assert commits[5].reviewers == tuple()

    commits = repository.hg_log(hg, [revs[1], revs[3]])
    assert len(commits) == 3, "hg log should return three commits"
    assert commits[0].node == revision2
    assert commits[1].node == revision3
    assert commits[2].node == revision4
Ejemplo n.º 10
0
def boot_worker() -> None:
    # Clone autoland
    def clone_autoland() -> None:
        logger.info(f"Cloning autoland in {REPO_DIR}...")
        repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")

    def extract_past_failures_label() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.PAST_FAILURES_LABEL_DB)
            )
            logger.info("Label-level past failures DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Label-level past failures DB not extracted, but missing models are allowed."
            )

    def extract_failing_together_label() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.FAILING_TOGETHER_LABEL_DB)
            )
            logger.info("Failing together label DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Failing together label DB not extracted, but missing models are allowed."
            )

    def extract_failing_together_config_group() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.FAILING_TOGETHER_CONFIG_GROUP_DB)
            )
            logger.info("Failing together config/group DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Failing together config/group DB not extracted, but missing models are allowed."
            )

    def extract_past_failures_group() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.PAST_FAILURES_GROUP_DB)
            )
            logger.info("Group-level past failures DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Group-level past failures DB not extracted, but missing models are allowed."
            )

    def extract_touched_together() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.TOUCHED_TOGETHER_DB)
            )
            logger.info("Touched together DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Touched together DB not extracted, but missing models are allowed."
            )

    def extract_commits() -> bool:
        try:
            utils.extract_file(f"{repository.COMMITS_DB}.zst")
            logger.info("Commits DB extracted.")
            return True
        except FileNotFoundError:
            logger.info("Commits DB not extracted, but missing models are allowed.")
            assert ALLOW_MISSING_MODELS
            return False

    def extract_commit_experiences() -> None:
        try:
            utils.extract_file(os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
            logger.info("Commit experiences DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Commit experiences DB not extracted, but missing models are allowed."
            )

    @tenacity.retry(
        stop=tenacity.stop_after_attempt(7),
        wait=tenacity.wait_exponential(multiplier=1, min=1, max=8),
    )
    def retrieve_schedulable_tasks() -> None:
        r = requests.get(
            "https://hg.mozilla.org/integration/autoland/json-pushes?version=2&tipsonly=1"
        )
        r.raise_for_status()
        revs = [
            push_obj["changesets"][0]
            for push_id, push_obj in r.json()["pushes"].items()
        ]

        logger.info(f"Retrieving known tasks from {revs}")

        # Store in a file the list of tasks in the latest autoland pushes.
        # We use more than one to protect ourselves from broken decision tasks.
        known_tasks = set()
        for rev in revs:
            r = requests.get(
                f"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.autoland.revision.{rev}.taskgraph.decision/artifacts/public/target-tasks.json"
            )
            if r.ok:
                known_tasks.update(r.json())

        logger.info(f"Retrieved {len(known_tasks)} tasks")

        assert len(known_tasks) > 0

        with open("known_tasks", "w") as f:
            f.write("\n".join(known_tasks))

    with concurrent.futures.ThreadPoolExecutor() as executor:
        clone_autoland_future = executor.submit(clone_autoland)

        retrieve_schedulable_tasks_future = executor.submit(retrieve_schedulable_tasks)

        commits_db_extracted = extract_commits()
        extract_commit_experiences()
        extract_touched_together()
        extract_past_failures_label()
        extract_past_failures_group()
        extract_failing_together_label()
        extract_failing_together_config_group()

        if commits_db_extracted:
            # Update the commits DB.
            logger.info("Browsing all commits...")
            nodes = collections.deque(
                (commit["node"] for commit in repository.get_commits()), maxlen=4096
            )
            nodes.reverse()
            logger.info("All commits browsed.")

            # Wait repository to be cloned, as it's required to call repository.download_commits.
            logger.info("Waiting autoland to be cloned...")
            clone_autoland_future.result()

            with hglib.open(REPO_DIR) as hg:
                # Try using nodes backwards, in case we have some node that was on central at the time
                # we mined commits, but is not yet on autoland.
                for node in nodes:
                    try:
                        revs = repository.get_revs(hg, rev_start=f"children({node})")
                        break
                    except hglib.error.CommandError as e:
                        if b"abort: unknown revision" not in e.err:
                            raise

            logger.info("Updating commits DB...")
            commits = repository.download_commits(
                REPO_DIR, revs=revs, use_single_process=True
            )
            logger.info("Commits DB updated.")

            logger.info("Updating touched together DB...")
            if len(commits) > 0:
                # Update the touched together DB.
                update_touched_together_gen = test_scheduling.update_touched_together()
                next(update_touched_together_gen)

                update_touched_together_gen.send(commits[-1]["node"])

                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass
            logger.info("Touched together DB updated.")

        # Wait list of schedulable tasks to be downloaded and written to disk.
        retrieve_schedulable_tasks_future.result()

    logger.info("Worker boot done")
Ejemplo n.º 11
0
    def apply_phab(self, hg, phabricator_deployment, diff_id):
        if phabricator_deployment == PHAB_PROD:
            api_key = get_secret("PHABRICATOR_TOKEN")
            url = get_secret("PHABRICATOR_URL")
        else:
            api_key = get_secret("PHABRICATOR_DEV_TOKEN")
            url = get_secret("PHABRICATOR_DEV_URL")

        phabricator_api = PhabricatorAPI(api_key=api_key, url=url)

        # Get the stack of patches
        stack = phabricator_api.load_patches_stack(diff_id)
        assert len(stack) > 0, "No patches to apply"

        # Find the first unknown base revision
        needed_stack = []
        revisions = {}
        for patch in reversed(stack):
            needed_stack.insert(0, patch)

            # Stop as soon as a base revision is available
            if self.has_revision(hg, patch.base_revision):
                logger.info(
                    f"Stopping at diff {patch.id} and revision {patch.base_revision}"
                )
                break

        if not needed_stack:
            logger.info("All the patches are already applied")
            return

        # Load all the diff revisions
        diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack])
        revisions = {
            diff["phid"]:
            phabricator_api.load_revision(rev_phid=diff["revisionPHID"],
                                          attachments={"reviewers": True})
            for diff in diffs
        }

        # Update repo to base revision
        hg_base = needed_stack[0].base_revision
        if not self.has_revision(hg, hg_base):
            logger.warning(
                "Missing base revision {} from Phabricator".format(hg_base))
            hg_base = "tip"

        if hg_base:
            hg.update(rev=hg_base, clean=True)
            logger.info(f"Updated repo to {hg_base}")

            if self.git_repo_dir and hg_base != "tip":
                try:
                    self.git_base = tuple(
                        vcs_map.mercurial_to_git(self.git_repo_dir,
                                                 [hg_base]))[0]
                    subprocess.run(
                        [
                            "git", "checkout", "-b", "analysis_branch",
                            self.git_base
                        ],
                        check=True,
                        cwd=self.git_repo_dir,
                    )
                    logger.info(f"Updated git repo to {self.git_base}")
                except Exception as e:
                    logger.info(
                        f"Updating git repo to Mercurial {hg_base} failed: {e}"
                    )

        def load_user(phid):
            if phid.startswith("PHID-USER"):
                return phabricator_api.load_user(user_phid=phid)
            elif phid.startswith("PHID-PROJ"):
                # TODO: Support group reviewers somehow.
                logger.info(f"Skipping group reviewer {phid}")
            else:
                raise Exception(f"Unsupported reviewer {phid}")

        for patch in needed_stack:
            revision = revisions[patch.phid]

            message = "{}\n\n{}".format(revision["fields"]["title"],
                                        revision["fields"]["summary"])
            author_name = None
            author_email = None

            if patch.commits:
                author_name = patch.commits[0]["author"]["name"]
                author_email = patch.commits[0]["author"]["email"]

            if author_name is None:
                author = load_user(revision["fields"]["authorPHID"])
                author_name = author["fields"]["realName"]
                # XXX: Figure out a way to know the email address of the author.
                author_email = author["fields"]["username"]

            reviewers = list(
                filter(
                    None,
                    (load_user(reviewer["reviewerPHID"]) for reviewer in
                     revision["attachments"]["reviewers"]["reviewers"]),
                ))
            reviewers = set(reviewer["fields"]["username"]
                            for reviewer in reviewers)

            if len(reviewers):
                message = replace_reviewers(message, reviewers)

            logger.info(
                f"Applying {patch.phid} from revision {revision['id']}: {message}"
            )

            hg.import_(
                patches=io.BytesIO(patch.patch.encode("utf-8")),
                message=message.encode("utf-8"),
                user=f"{author_name} <{author_email}>".encode("utf-8"),
            )

            if self.git_repo_dir:
                cmd = [
                    "git", "cinnabar", "fetch", "hg::", repo_dir, latest_rev
                ]
                proc = subprocess.Popen(cmd,
                                        shell=True,
                                        stdin=subprocess.PIPE,
                                        cwd=self.git_repo_dir)

                subprocess.run(
                    [
                        "git",
                        "-c",
                        f"user.name={author_name}",
                        "-c",
                        f"user.email={author_email}",
                        "commit",
                        "-am",
                        message,
                    ],
                    check=True,
                    cwd=self.git_repo_dir,
                )
        latest_rev = repository.get_revs(hg, f"-{len(stack)}")
Ejemplo n.º 12
0
    def get_commits_to_ignore(self):
        logger.info("Download previous commits to ignore...")
        db.download(IGNORED_COMMITS_DB)

        logger.info("Get previously classified commits...")
        prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB))
        logger.info(f"Already found {len(prev_commits_to_ignore)} commits to ignore...")

        # When we already have some analyzed commits, re-analyze the last 3500 to make sure
        # we didn't miss back-outs that happened since the last analysis.
        if len(prev_commits_to_ignore) > 0:
            first_commit_to_reanalyze = (
                -3500 if len(prev_commits_to_ignore) >= 3500 else 0
            )
            rev_start = "children({})".format(
                prev_commits_to_ignore[first_commit_to_reanalyze]["rev"]
            )
        else:
            rev_start = 0

        with hglib.open(self.mercurial_repo_dir) as hg:
            revs = repository.get_revs(hg, rev_start)

        # Drop commits which are not yet present in the mercurial <-> git mapping.
        while len(revs) > 0:
            try:
                vcs_map.mercurial_to_git(revs[-1].decode("ascii"))
                break
            except Exception as e:
                if not str(e).startswith("Missing mercurial commit in the VCS map"):
                    raise

                revs.pop()

        commits = repository.hg_log_multi(self.mercurial_repo_dir, revs)

        repository.set_commits_to_ignore(self.mercurial_repo_dir, commits)

        chosen_commits = set()
        commits_to_ignore = []
        for commit in commits:
            if commit.ignored or commit.backedoutby:
                commits_to_ignore.append(
                    {
                        "rev": commit.node,
                        "type": "backedout" if commit.backedoutby else "",
                    }
                )
                chosen_commits.add(commit.node)

        logger.info(f"{len(commits_to_ignore)} new commits to ignore...")

        for prev_commit in prev_commits_to_ignore[::-1]:
            if prev_commit["rev"] not in chosen_commits:
                commits_to_ignore.append(prev_commit)
                chosen_commits.add(prev_commit["rev"])

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        logger.info(
            "...of which {} are backed-out".format(
                sum(1 for commit in commits_to_ignore if commit["type"] == "backedout")
            )
        )

        db.write(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)
        db.upload(IGNORED_COMMITS_DB)