Beispiel #1
0
def boot_worker():
    # Clone autoland
    logger.info(f"Cloning mozilla autoland in {REPO_DIR}...")
    repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")

    # Download test scheduling DB support files.
    logger.info("Downloading test scheduling DB support files...")
    assert (db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
    ) or ALLOW_MISSING_MODELS)

    assert (db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_GROUP_DB,
    ) or ALLOW_MISSING_MODELS)

    assert (db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.TOUCHED_TOGETHER_DB,
    ) or ALLOW_MISSING_MODELS)

    # Download commits DB
    logger.info("Downloading commits DB...")
    commits_db_downloaded = db.download(repository.COMMITS_DB,
                                        support_files_too=True)
    if not ALLOW_MISSING_MODELS:
        assert commits_db_downloaded

    if commits_db_downloaded:
        # And update it
        logger.info("Browsing all commits...")
        for commit in repository.get_commits():
            pass

        rev_start = "children({})".format(commit["node"])
        logger.info("Updating commits DB...")
        commits = repository.download_commits(REPO_DIR,
                                              rev_start,
                                              use_single_process=True)

        if len(commits) > 0:
            # Update the touched together DB.
            update_touched_together_gen = test_scheduling.update_touched_together(
            )
            next(update_touched_together_gen)

            update_touched_together_gen.send(commits[-1]["node"])

            try:
                update_touched_together_gen.send(None)
            except StopIteration:
                pass

    # Preload models
    bugbug_http.models.preload_models()

    logger.info("Worker boot done")
Beispiel #2
0
def boot_worker():
    # Clone autoland
    def clone_autoland():
        logger.info(f"Cloning autoland in {REPO_DIR}...")
        repository.clone(REPO_DIR,
                         "https://hg.mozilla.org/integration/autoland")

    def extract_past_failures_label():
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.PAST_FAILURES_LABEL_DB))
            logger.info("Label-level past failures DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Label-level past failures DB not extracted, but missing models are allowed."
            )

    def extract_failing_together():
        try:
            utils.extract_file(
                os.path.join("data",
                             test_scheduling.FAILING_TOGETHER_LABEL_DB))
            logger.info("Failing together DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Failing together DB not extracted, but missing models are allowed."
            )

    def extract_past_failures_group():
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.PAST_FAILURES_GROUP_DB))
            logger.info("Group-level past failures DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Group-level past failures DB not extracted, but missing models are allowed."
            )

    def extract_touched_together():
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.TOUCHED_TOGETHER_DB))
            logger.info("Touched together DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Touched together DB not extracted, but missing models are allowed."
            )

    def extract_commits():
        try:
            utils.extract_file(f"{repository.COMMITS_DB}.zst")
            logger.info("Commits DB extracted.")
            return True
        except FileNotFoundError:
            logger.info(
                "Commits DB not extracted, but missing models are allowed.")
            assert ALLOW_MISSING_MODELS
            return False

    def extract_commit_experiences():
        try:
            utils.extract_file(
                os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
            logger.info("Commit experiences DB extracted.")
        except FileNotFoundError:
            logger.info(
                "Commit experiences DB not extracted, but missing models are allowed."
            )
            assert ALLOW_MISSING_MODELS

    @tenacity.retry(
        stop=tenacity.stop_after_attempt(7),
        wait=tenacity.wait_exponential(multiplier=1, min=1, max=8),
    )
    def retrieve_schedulable_tasks():
        # Store in a file the list of tasks in the latest autoland push.
        r = requests.get(
            "https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.autoland.latest.taskgraph.decision/artifacts/public/target-tasks.json"
        )
        r.raise_for_status()
        with open("known_tasks", "w") as f:
            f.write("\n".join(r.json()))

    with concurrent.futures.ThreadPoolExecutor() as executor:
        clone_autoland_future = executor.submit(clone_autoland)

        retrieve_schedulable_tasks_future = executor.submit(
            retrieve_schedulable_tasks)

        commits_db_extracted = extract_commits()
        extract_commit_experiences()
        extract_touched_together()
        extract_past_failures_label()
        extract_past_failures_group()
        extract_failing_together()

        if commits_db_extracted:
            # Update the commits DB.
            logger.info("Browsing all commits...")
            for commit in repository.get_commits():
                pass
            logger.info("All commits browsed.")

            # Wait repository to be cloned, as it's required to call repository.download_commits.
            logger.info("Waiting autoland to be cloned...")
            clone_autoland_future.result()

            rev_start = "children({})".format(commit["node"])
            logger.info("Updating commits DB...")
            commits = repository.download_commits(REPO_DIR,
                                                  rev_start,
                                                  use_single_process=True)
            logger.info("Commits DB updated.")

            logger.info("Updating touched together DB...")
            if len(commits) > 0:
                # Update the touched together DB.
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

                update_touched_together_gen.send(commits[-1]["node"])

                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass
            logger.info("Touched together DB updated.")

        # Wait list of schedulable tasks to be downloaded and written to disk.
        retrieve_schedulable_tasks_future.result()

    logger.info("Worker boot done")
Beispiel #3
0
def test_touched_together(monkeypatch):
    test_scheduling.touched_together = None

    repository.path_to_component = {
        "dom/file1.cpp": "Core::DOM",
        "dom/file2.cpp": "Core::DOM",
        "layout/file.cpp": "Core::Layout",
        "dom/tests/manifest1.ini": "Core::DOM",
        "dom/tests/manifest2.ini": "Core::DOM",
    }

    commits = [
        repository.Commit(
            node="commit1",
            author="author1",
            desc="commit1",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer1", "reviewer2"),
        ).set_files(["dom/file1.cpp", "dom/tests/manifest1.ini"], {}),
        repository.Commit(
            node="commitbackedout",
            author="author1",
            desc="commitbackedout",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="commitbackout",
            author_email="*****@*****.**",
            reviewers=("reviewer1", "reviewer2"),
        ).set_files(["dom/file1.cpp", "dom/tests/manifest1.ini"], {}),
        repository.Commit(
            node="commit2",
            author="author2",
            desc="commit2",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer1", ),
        ).set_files(["dom/file2.cpp", "layout/tests/manifest2.ini"], {}),
        repository.Commit(
            node="commit3",
            author="author1",
            desc="commit3",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer2", ),
        ).set_files(["layout/file.cpp", "dom/tests/manifest1.ini"], {}),
        repository.Commit(
            node="commit4",
            author="author1",
            desc="commit4",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer1", "reviewer2"),
        ).set_files(["dom/file1.cpp", "dom/tests/manifest1.ini"], {}),
    ]
    commits = [c.to_dict() for c in commits]

    def mock_get_commits():
        return commits

    monkeypatch.setattr(repository, "get_commits", mock_get_commits)

    update_touched_together_gen = test_scheduling.update_touched_together()
    next(update_touched_together_gen)

    update_touched_together_gen.send("commit2")

    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/tests") == 1
    assert test_scheduling.get_touched_together("dom/tests",
                                                "dom/file1.cpp") == 1
    assert test_scheduling.get_touched_together("dom",
                                                "dom/tests/manifest1.ini") == 1
    assert test_scheduling.get_touched_together("dom", "dom/tests") == 1
    assert test_scheduling.get_touched_together("dom", "dom") == 0

    assert test_scheduling.get_touched_together("dom/file2.cpp",
                                                "layout/tests") == 1
    assert (test_scheduling.get_touched_together(
        "dom", "layout/tests/manifest2.ini") == 1)
    assert test_scheduling.get_touched_together("dom", "layout/tests") == 1
    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/file2.cpp") == 0

    assert test_scheduling.get_touched_together("layout/file.cpp",
                                                "dom/tests") == 0
    assert test_scheduling.get_touched_together("layout", "dom/tests") == 0

    update_touched_together_gen.send("commit4")

    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/tests") == 2
    assert test_scheduling.get_touched_together("dom/tests",
                                                "dom/file1.cpp") == 2
    assert test_scheduling.get_touched_together("dom",
                                                "dom/tests/manifest1.ini") == 2
    assert test_scheduling.get_touched_together("dom", "dom/tests") == 2
    assert test_scheduling.get_touched_together("dom", "dom") == 0

    assert test_scheduling.get_touched_together("dom/file2.cpp",
                                                "layout/tests") == 1
    assert (test_scheduling.get_touched_together(
        "dom", "layout/tests/manifest2.ini") == 1)
    assert test_scheduling.get_touched_together("dom", "layout/tests") == 1
    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/file2.cpp") == 0

    assert test_scheduling.get_touched_together("layout/file.cpp",
                                                "dom/tests") == 1
    assert (test_scheduling.get_touched_together(
        "layout", "dom/tests/manifest1.ini") == 1)
    assert test_scheduling.get_touched_together("layout", "dom/tests") == 1
        def generate_all_data() -> Generator[Dict[str, Any], None, None]:
            past_failures = test_scheduling.get_past_failures(
                granularity, False)

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                commit_map[commit_data["node"]] = commit_data

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            if granularity in ("group", "config_group"):
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

            for (
                    i,
                (
                    revisions,
                    fix_revision,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ),
            ) in enumerate(tqdm(push_data_iter(), total=push_data_count)):
                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                # Skip wptsync commits, since they are not like normal pushes made by developers.
                if any(repository.is_wptsync(commit) for commit in commits):
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions +
                        likely_regressions))

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity in ("group", "config_group"):
                    update_touched_together_gen.send(commits[0]["node"])

                result_data = []
                for data in test_scheduling.generate_data(
                        granularity,
                        past_failures,
                        merged_commits,
                        push_num,
                        runnables_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result_data.append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield {
                        "revs": revisions,
                        "data": result_data,
                    }

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(
                f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
def test_touched_together_restart(monkeypatch: MonkeyPatch) -> None:
    test_scheduling.touched_together = None

    repository.path_to_component = {
        "dom/file1.cpp": "Core::DOM",
        "dom/file2.cpp": "Core::DOM",
        "layout/file.cpp": "Core::Layout",
        "dom/tests/manifest1.ini": "Core::DOM",
        "dom/tests/manifest2.ini": "Core::DOM",
    }

    commits = [
        repository.Commit(
            node="commit1",
            author="author1",
            desc="commit1",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer1", "reviewer2"),
        ).set_files(["dom/file1.cpp", "dom/tests/manifest1.ini"], {}),
        repository.Commit(
            node="commitbackedout",
            author="author1",
            desc="commitbackedout",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="commitbackout",
            author_email="*****@*****.**",
            reviewers=("reviewer1", "reviewer2"),
        ).set_files(["dom/file1.cpp", "dom/tests/manifest1.ini"], {}),
        repository.Commit(
            node="commit2",
            author="author2",
            desc="commit2",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer1", ),
        ).set_files(["dom/file2.cpp", "layout/tests/manifest2.ini"], {}),
        repository.Commit(
            node="commit3",
            author="author1",
            desc="commit3",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer2", ),
        ).set_files(["layout/file.cpp", "dom/tests/manifest1.ini"], {}),
        repository.Commit(
            node="commit4",
            author="author1",
            desc="commit4",
            date=datetime(2019, 1, 1),
            pushdate=datetime(2019, 1, 1),
            bug_id=123,
            backsout=[],
            backedoutby="",
            author_email="*****@*****.**",
            reviewers=("reviewer1", "reviewer2"),
        ).set_files(["dom/file1.cpp", "dom/tests/manifest1.ini"], {}),
    ]
    commits = [c.to_dict() for c in commits]

    def mock_get_commits() -> List[CommitDict]:
        return commits

    monkeypatch.setattr(repository, "get_commits", mock_get_commits)

    update_touched_together_gen = test_scheduling.update_touched_together()
    next(update_touched_together_gen)

    update_touched_together_gen.send(Revision("commit2"))

    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/tests") == 1
    assert test_scheduling.get_touched_together("dom/tests",
                                                "dom/file1.cpp") == 1
    assert test_scheduling.get_touched_together("dom",
                                                "dom/tests/manifest1.ini") == 1
    assert test_scheduling.get_touched_together("dom", "dom/tests") == 1
    assert test_scheduling.get_touched_together("dom", "dom") == 0

    assert test_scheduling.get_touched_together("dom/file2.cpp",
                                                "layout/tests") == 1
    assert (test_scheduling.get_touched_together(
        "dom", "layout/tests/manifest2.ini") == 1)
    assert test_scheduling.get_touched_together("dom", "layout/tests") == 1
    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/file2.cpp") == 0

    assert test_scheduling.get_touched_together("layout/file.cpp",
                                                "dom/tests") == 0
    assert test_scheduling.get_touched_together("layout", "dom/tests") == 0

    try:
        update_touched_together_gen.send(None)
    except StopIteration:
        pass

    # Ensure we can still read the DB after closing.
    assert test_scheduling.get_touched_together("dom", "layout/tests") == 1
    test_scheduling.close_touched_together_db()

    update_touched_together_gen = test_scheduling.update_touched_together()
    next(update_touched_together_gen)

    update_touched_together_gen.send(Revision("commit4"))

    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/tests") == 2
    assert test_scheduling.get_touched_together("dom/tests",
                                                "dom/file1.cpp") == 2
    assert test_scheduling.get_touched_together("dom",
                                                "dom/tests/manifest1.ini") == 2
    assert test_scheduling.get_touched_together("dom", "dom/tests") == 2
    assert test_scheduling.get_touched_together("dom", "dom") == 0

    assert test_scheduling.get_touched_together("dom/file2.cpp",
                                                "layout/tests") == 1
    assert (test_scheduling.get_touched_together(
        "dom", "layout/tests/manifest2.ini") == 1)
    assert test_scheduling.get_touched_together("dom", "layout/tests") == 1
    assert test_scheduling.get_touched_together("dom/file1.cpp",
                                                "dom/file2.cpp") == 0

    assert test_scheduling.get_touched_together("layout/file.cpp",
                                                "dom/tests") == 1
    assert (test_scheduling.get_touched_together(
        "layout", "dom/tests/manifest1.ini") == 1)
    assert test_scheduling.get_touched_together("layout", "dom/tests") == 1
        def generate_all_data():
            past_failures = test_scheduling.get_past_failures(granularity)

            push_num = past_failures["push_num"] if "push_num" in past_failures else 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                if not can_start:
                    if last_node == commit_data["node"]:
                        can_start = True

                    continue

                commit_map[commit_data["node"]] = commit_data

            with open(push_data_path, "r") as f:
                push_data = json.load(f)

            logger.info(f"push data nodes: {len(push_data)}")

            if granularity == "label":
                push_data = [
                    (
                        revisions,
                        rename_tasks(push_tasks),
                        rename_tasks(possible_regressions),
                        rename_tasks(likely_regressions),
                    )
                    for revisions, push_tasks, possible_regressions, likely_regressions in push_data
                ]

            # In the last 28 pushes, we definitely run all possible runnables.
            all_runnables_set = set(
                sum((push_runnables for _, push_runnables, _, _ in push_data[-28:]), [])
            )
            # Filter runnables we don't need.
            all_runnables = filter_runnables(
                list(all_runnables_set), all_runnables_set, granularity
            )
            all_runnables_set = set(all_runnables_set)
            logger.info(f"{len(all_runnables_set)} runnables run in the last 28 pushes")

            push_data = [
                (
                    revisions,
                    filter_runnables(push_tasks, all_runnables_set, granularity),
                    filter_runnables(
                        possible_regressions, all_runnables_set, granularity
                    ),
                    filter_runnables(
                        likely_regressions, all_runnables_set, granularity
                    ),
                )
                for revisions, push_tasks, possible_regressions, likely_regressions in push_data
            ]

            if granularity == "label":
                generate_failing_together_probabilities(push_data)

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            if granularity == "group":
                update_touched_together_gen = test_scheduling.update_touched_together()
                next(update_touched_together_gen)

            for i in tqdm(range(len(push_data))):
                (
                    revisions,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ) = push_data.pop(0)

                if not can_start:
                    if last_node == revisions[0]:
                        can_start = True

                    continue

                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision)
                    for revision in revisions
                    if revision in commit_map
                )
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions + likely_regressions)
                )

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity == "group":
                    update_touched_together_gen.send(commits[0]["node"])

                result = {
                    "revs": revisions,
                    "data": [],
                }
                for data in test_scheduling.generate_data(
                    past_failures,
                    merged_commits,
                    push_num,
                    runnables_to_consider,
                    possible_regressions,
                    likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result["data"].append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield result

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
Beispiel #7
0
def boot_worker() -> None:
    # Clone autoland
    def clone_autoland() -> None:
        logger.info(f"Cloning autoland in {REPO_DIR}...")
        repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")

    def extract_past_failures_label() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.PAST_FAILURES_LABEL_DB)
            )
            logger.info("Label-level past failures DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Label-level past failures DB not extracted, but missing models are allowed."
            )

    def extract_failing_together_label() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.FAILING_TOGETHER_LABEL_DB)
            )
            logger.info("Failing together label DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Failing together label DB not extracted, but missing models are allowed."
            )

    def extract_failing_together_config_group() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.FAILING_TOGETHER_CONFIG_GROUP_DB)
            )
            logger.info("Failing together config/group DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Failing together config/group DB not extracted, but missing models are allowed."
            )

    def extract_past_failures_group() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.PAST_FAILURES_GROUP_DB)
            )
            logger.info("Group-level past failures DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Group-level past failures DB not extracted, but missing models are allowed."
            )

    def extract_touched_together() -> None:
        try:
            utils.extract_file(
                os.path.join("data", test_scheduling.TOUCHED_TOGETHER_DB)
            )
            logger.info("Touched together DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Touched together DB not extracted, but missing models are allowed."
            )

    def extract_commits() -> bool:
        try:
            utils.extract_file(f"{repository.COMMITS_DB}.zst")
            logger.info("Commits DB extracted.")
            return True
        except FileNotFoundError:
            logger.info("Commits DB not extracted, but missing models are allowed.")
            assert ALLOW_MISSING_MODELS
            return False

    def extract_commit_experiences() -> None:
        try:
            utils.extract_file(os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
            logger.info("Commit experiences DB extracted.")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            logger.info(
                "Commit experiences DB not extracted, but missing models are allowed."
            )

    @tenacity.retry(
        stop=tenacity.stop_after_attempt(7),
        wait=tenacity.wait_exponential(multiplier=1, min=1, max=8),
    )
    def retrieve_schedulable_tasks() -> None:
        r = requests.get(
            "https://hg.mozilla.org/integration/autoland/json-pushes?version=2&tipsonly=1"
        )
        r.raise_for_status()
        revs = [
            push_obj["changesets"][0]
            for push_id, push_obj in r.json()["pushes"].items()
        ]

        logger.info(f"Retrieving known tasks from {revs}")

        # Store in a file the list of tasks in the latest autoland pushes.
        # We use more than one to protect ourselves from broken decision tasks.
        known_tasks = set()
        for rev in revs:
            r = requests.get(
                f"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.autoland.revision.{rev}.taskgraph.decision/artifacts/public/target-tasks.json"
            )
            if r.ok:
                known_tasks.update(r.json())

        logger.info(f"Retrieved {len(known_tasks)} tasks")

        assert len(known_tasks) > 0

        with open("known_tasks", "w") as f:
            f.write("\n".join(known_tasks))

    with concurrent.futures.ThreadPoolExecutor() as executor:
        clone_autoland_future = executor.submit(clone_autoland)

        retrieve_schedulable_tasks_future = executor.submit(retrieve_schedulable_tasks)

        commits_db_extracted = extract_commits()
        extract_commit_experiences()
        extract_touched_together()
        extract_past_failures_label()
        extract_past_failures_group()
        extract_failing_together_label()
        extract_failing_together_config_group()

        if commits_db_extracted:
            # Update the commits DB.
            logger.info("Browsing all commits...")
            nodes = collections.deque(
                (commit["node"] for commit in repository.get_commits()), maxlen=4096
            )
            nodes.reverse()
            logger.info("All commits browsed.")

            # Wait repository to be cloned, as it's required to call repository.download_commits.
            logger.info("Waiting autoland to be cloned...")
            clone_autoland_future.result()

            with hglib.open(REPO_DIR) as hg:
                # Try using nodes backwards, in case we have some node that was on central at the time
                # we mined commits, but is not yet on autoland.
                for node in nodes:
                    try:
                        revs = repository.get_revs(hg, rev_start=f"children({node})")
                        break
                    except hglib.error.CommandError as e:
                        if b"abort: unknown revision" not in e.err:
                            raise

            logger.info("Updating commits DB...")
            commits = repository.download_commits(
                REPO_DIR, revs=revs, use_single_process=True
            )
            logger.info("Commits DB updated.")

            logger.info("Updating touched together DB...")
            if len(commits) > 0:
                # Update the touched together DB.
                update_touched_together_gen = test_scheduling.update_touched_together()
                next(update_touched_together_gen)

                update_touched_together_gen.send(commits[-1]["node"])

                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass
            logger.info("Touched together DB updated.")

        # Wait list of schedulable tasks to be downloaded and written to disk.
        retrieve_schedulable_tasks_future.result()

    logger.info("Worker boot done")
Beispiel #8
0
def boot_worker():
    # Clone autoland
    def clone_autoland():
        logger.info(f"Cloning autoland in {REPO_DIR}...")
        repository.clone(REPO_DIR,
                         "https://hg.mozilla.org/integration/autoland")

    def extract_past_failures_label():
        try:
            utils.extract_file(test_scheduling.PAST_FAILURES_LABEL_DB)
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS

        logger.info("Label-level past failures DB extracted.")

    def extract_past_failures_group():
        try:
            utils.extract_file(test_scheduling.PAST_FAILURES_GROUP_DB)
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS

        logger.info("Group-level past failures DB extracted.")

    def extract_touched_together():
        try:
            utils.extract_file(test_scheduling.TOUCHED_TOGETHER_DB)
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS

        logger.info("Touched together DB extracted.")

    def extract_commits():
        try:
            utils.extract_file(f"{repository.COMMITS_DB}.zst")
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS
            return False

        logger.info("Commits DB extracted.")
        return True

    def extract_commit_experiences():
        try:
            utils.extract_file(repository.COMMIT_EXPERIENCES_DB)
        except FileNotFoundError:
            assert ALLOW_MISSING_MODELS

        logger.info("Commit experiences DB extracted.")

    with concurrent.futures.ThreadPoolExecutor() as executor:
        clone_autoland_future = executor.submit(clone_autoland)

        commits_db_extracted = extract_commits()
        extract_commit_experiences()
        extract_touched_together()
        extract_past_failures_label()
        extract_past_failures_group()

        if commits_db_extracted:
            # Update the commits DB.
            logger.info("Browsing all commits...")
            for commit in repository.get_commits():
                pass
            logger.info("All commits browsed.")

            # Wait repository to be cloned, as it's required to call repository.download_commits.
            logger.info("Waiting autoland to be cloned...")
            clone_autoland_future.result()

            rev_start = "children({})".format(commit["node"])
            logger.info("Updating commits DB...")
            commits = repository.download_commits(REPO_DIR,
                                                  rev_start,
                                                  use_single_process=True)
            logger.info("Commits DB updated.")

            logger.info("Updating touched together DB...")
            if len(commits) > 0:
                # Update the touched together DB.
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

                update_touched_together_gen.send(commits[-1]["node"])

                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass
            logger.info("Touched together DB updated.")

    logger.info("Worker boot done")