예제 #1
0
    def select_tests(
        self,
        commits: Sequence[repository.CommitDict],
        confidence: float = 0.5,
        push_num: Optional[int] = None,
    ) -> Dict[str, float]:
        commit_data = commit_features.merge_commits(commits)

        past_failures_data = test_scheduling.get_past_failures(self.granularity, True)

        if push_num is None:
            push_num = past_failures_data["push_num"] + 1
        all_runnables = past_failures_data["all_runnables"]

        commit_tests = []
        for data in test_scheduling.generate_data(
            self.granularity,
            past_failures_data,
            commit_data,
            push_num,
            all_runnables,
            tuple(),
            tuple(),
        ):
            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = self.classify(commit_tests, probabilities=True)
        selected_indexes = np.argwhere(probs[:, 1] >= confidence)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]: math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }
예제 #2
0
    def get_runnables(granularity):
        past_failures_data = test_scheduling.get_past_failures(granularity)

        push_num = past_failures_data["push_num"]
        all_runnables = past_failures_data["all_runnables"]

        commit_tests = []
        for data in test_scheduling.generate_data(past_failures_data,
                                                  commit_data, push_num,
                                                  all_runnables, [], []):
            if granularity == "label" and not data["name"].startswith("test-"):
                continue

            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = MODEL_CACHE.get(f"test{granularity}select").classify(
            commit_tests, probabilities=True)
        selected_indexes = np.argwhere(
            probs[:, 1] > test_selection_threshold)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]:
            math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }
예제 #3
0
    def __init__(self, model_name, repo_dir, git_repo_dir, method_defect_predictor_dir):
        self.model_name = model_name
        self.repo_dir = repo_dir

        self.model = download_and_load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "8cc47f47ffb686a29324435a0151b5fabd37f865",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

            past_bugs_by_function_path = "data/past_bugs_by_function.pickle"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "rb") as f:
                self.past_bugs_by_function = pickle.load(f)

        if model_name == "testlabelselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_LABEL_SCHEDULING_DB,
                test_scheduling.PAST_FAILURES_LABEL_DB,
            )
            self.past_failures_data = test_scheduling.get_past_failures("label")

            self.testfailure_model = download_and_load_model("testfailure")
            assert self.testfailure_model is not None
예제 #4
0
def get_config_specific_groups(config: str) -> str:
    from bugbug_http.app import JobInfo

    job = JobInfo(get_config_specific_groups, config)
    LOGGER.info(f"Processing {job}...")

    testgroupselect_model = MODEL_CACHE.get("testgroupselect")
    equivalence_sets = testgroupselect_model._get_equivalence_sets(0.9)

    past_failures_data = test_scheduling.get_past_failures("group", True)
    all_runnables = past_failures_data["all_runnables"]

    setkey(
        job.result_key,
        orjson.dumps(
            [
                {"name": group}
                for group in all_runnables
                if any(
                    equivalence_set == {config}
                    for equivalence_set in equivalence_sets[group]
                )
            ]
        ),
        compress=True,
    )

    return "OK"
예제 #5
0
    def select_tests(self, commits, confidence=0.3, push_num=None):
        commit_data = commit_features.merge_commits(commits)

        past_failures_data = test_scheduling.get_past_failures(
            self.granularity)

        if push_num is None:
            push_num = past_failures_data["push_num"] + 1
        all_runnables = past_failures_data["all_runnables"]

        if self.granularity == "label":
            all_runnables = tuple(r for r in all_runnables
                                  if r.startswith("test-"))

        commit_tests = []
        for data in test_scheduling.generate_data(past_failures_data,
                                                  commit_data, push_num,
                                                  all_runnables, tuple(),
                                                  tuple()):
            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = self.classify(commit_tests, probabilities=True)
        selected_indexes = np.argwhere(probs[:, 1] >= confidence)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]:
            math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }
예제 #6
0
def mock_schedule_tests_classify(monkeypatch):
    with open("known_tasks", "w") as f:
        f.write("prova")

    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
        past_failures_data = test_scheduling.get_past_failures(granularity)
        past_failures_data["push_num"] = 1
        past_failures_data["all_runnables"] = [
            f"test-{granularity}1",
            f"test-{granularity}2",
            "test-linux64/opt",
            "test-windows10/opt",
        ]
        past_failures_data.close()

    failing_together = test_scheduling.get_failing_together_db()
    failing_together[b"test-linux64/opt$test-windows10/opt"] = struct.pack(
        "ff", 0.1, 1.0)
    test_scheduling.close_failing_together_db()

    def do_mock(labels_to_choose, groups_to_choose):
        # Add a mock test selection model.
        def classify(self, items, probabilities=False):
            assert probabilities
            results = []
            for item in items:
                runnable_name = item["test_job"]["name"]
                if self.granularity == "label":
                    if runnable_name in labels_to_choose:
                        results.append([
                            1 - labels_to_choose[runnable_name],
                            labels_to_choose[runnable_name],
                        ])
                    else:
                        results.append([0.9, 0.1])
                elif self.granularity == "group":
                    if runnable_name in groups_to_choose:
                        results.append([
                            1 - groups_to_choose[runnable_name],
                            groups_to_choose[runnable_name],
                        ])
                    else:
                        results.append([0.9, 0.1])
            return np.array(results)

        class MockModelCache:
            def get(self, model_name):
                if "group" in model_name:
                    return bugbug.models.testselect.TestGroupSelectModel()
                else:
                    return bugbug.models.testselect.TestLabelSelectModel()

        monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE",
                            MockModelCache())
        monkeypatch.setattr(bugbug.models.testselect.TestSelectModel,
                            "classify", classify)

    return do_mock
예제 #7
0
    def __init__(self, model_name, cache_root, git_repo_dir,
                 method_defect_predictor_dir):
        self.model_name = model_name
        self.cache_root = cache_root

        assert os.path.isdir(
            cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        self.model = self.load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev",
                                git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "fa5269b959d8ddf7e97d1e92523bb64c17f9bbcd",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            if not os.path.exists(model_data_X_path):
                download_check_etag(
                    URL.format(model_name=model_name,
                               file_name=f"{model_data_X_path}.zst"))
                zstd_decompress(model_data_X_path)
                assert os.path.exists(
                    model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            if not os.path.exists(model_data_y_path):
                download_check_etag(
                    URL.format(model_name=model_name,
                               file_name=f"{model_data_y_path}.zst"))
                zstd_decompress(model_data_y_path)
                assert os.path.exists(
                    model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

        if model_name == "testselect":
            self.use_test_history = True
            assert db.download_support_file(test_scheduling.TEST_SCHEDULING_DB,
                                            test_scheduling.PAST_FAILURES_DB)
            self.past_failures_data = test_scheduling.get_past_failures()

            self.backout_model = self.load_model("backout")
            assert self.backout_model is not None
예제 #8
0
파일: conftest.py 프로젝트: mozilla/bugbug
def mock_get_config_specific_groups(
    monkeypatch: MonkeyPatch,
) -> None:
    with open("known_tasks", "w") as f:
        f.write("prova")

    # Initialize a mock past failures DB.
    past_failures_data = test_scheduling.get_past_failures("group", False)
    past_failures_data["push_num"] = 1
    past_failures_data["all_runnables"] = [
        "test-group1",
        "test-group2",
    ]
    past_failures_data.close()

    try:
        test_scheduling.close_failing_together_db("config_group")
    except AssertionError:
        pass
    failing_together = test_scheduling.get_failing_together_db("config_group", False)
    failing_together[b"$ALL_CONFIGS$"] = pickle.dumps(
        ["test-linux1804-64/opt-*", "test-windows10/debug-*", "test-windows10/opt-*"]
    )
    failing_together[b"$CONFIGS_BY_GROUP$"] = pickle.dumps(
        {
            "test-group1": {
                "test-linux1804-64/opt-*",
                "test-windows10/debug-*",
                "test-windows10/opt-*",
            },
            "test-group2": {
                "test-linux1804-64/opt-*",
                "test-windows10/debug-*",
                "test-windows10/opt-*",
            },
        }
    )
    failing_together[b"test-group1"] = pickle.dumps(
        {
            "test-linux1804-64/opt-*": {
                "test-windows10/debug-*": (1.0, 0.0),
                "test-windows10/opt-*": (1.0, 0.0),
            },
            "test-windows10/debug-*": {
                "test-windows10/opt-*": (1.0, 1.0),
            },
        }
    )
    test_scheduling.close_failing_together_db("config_group")

    monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE", MockModelCache())
예제 #9
0
def mock_schedule_tests_classify(monkeypatch):
    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
        past_failures_data = test_scheduling.get_past_failures(granularity)
        past_failures_data["push_num"] = 1
        past_failures_data["all_runnables"] = [
            f"test-{granularity}1",
            f"test-{granularity}2",
        ]
        past_failures_data.close()

    def do_mock(labels_to_choose, groups_to_choose):
        # Add a mock test selection model.
        class Model:
            def __init__(self, name):
                self.name = name

            def classify(self, items, probabilities=False):
                assert probabilities
                results = []
                for item in items:
                    runnable_name = item["test_job"]["name"]
                    if self.name == "testlabelselect":
                        if runnable_name in labels_to_choose:
                            results.append(
                                [
                                    1 - labels_to_choose[runnable_name],
                                    labels_to_choose[runnable_name],
                                ]
                            )
                        else:
                            results.append([0.9, 0.1])
                    elif self.name == "testgroupselect":
                        if runnable_name in groups_to_choose:
                            results.append(
                                [
                                    1 - groups_to_choose[runnable_name],
                                    groups_to_choose[runnable_name],
                                ]
                            )
                        else:
                            results.append([0.9, 0.1])
                return np.array(results)

        class MockModelCache:
            def get(self, model_name):
                return Model(model_name)

        monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE", MockModelCache())

    return do_mock
예제 #10
0
    def _get_equivalence_sets(self, min_redundancy_confidence: float):
        try:
            with open(
                f"equivalence_sets_{min_redundancy_confidence}.pickle", "rb"
            ) as f:
                return pickle.load(f)
        except FileNotFoundError:
            past_failures_data = test_scheduling.get_past_failures(
                self.granularity, True
            )
            all_runnables = past_failures_data["all_runnables"]

            equivalence_sets = {}
            failing_together = test_scheduling.get_failing_together_db(
                "config_group", True
            )
            all_configs = pickle.loads(failing_together[b"$ALL_CONFIGS$"])
            configs_by_group = pickle.loads(failing_together[b"$CONFIGS_BY_GROUP$"])
            for group in all_runnables:
                key = test_scheduling.failing_together_key(group)
                try:
                    failing_together_stats = pickle.loads(failing_together[key])
                except KeyError:
                    failing_together_stats = {}

                def load_failing_together(
                    config: str,
                ) -> Dict[str, Tuple[float, float]]:
                    return failing_together_stats[config]

                configs = (
                    configs_by_group[group]
                    if group in configs_by_group
                    else all_configs
                )

                equivalence_sets[group] = self._generate_equivalence_sets(
                    configs, min_redundancy_confidence, load_failing_together, True
                )

            with open(
                f"equivalence_sets_{min_redundancy_confidence}.pickle", "wb"
            ) as f:
                pickle.dump(equivalence_sets, f)

            return equivalence_sets
예제 #11
0
def mock_schedule_tests_classify(tmpdir, monkeypatch):
    os.chdir(tmpdir)
    os.makedirs("data")

    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
        past_failures_data = test_scheduling.get_past_failures(granularity)
        past_failures_data["push_num"] = 1
        past_failures_data["all_runnables"] = [
            f"test-{granularity}1",
            f"test-{granularity}2",
        ]
        past_failures_data.close()

    def do_mock(labels_to_choose, groups_to_choose):
        # Add a mock test selection model.
        class Model:
            def __init__(self, name):
                self.name = name

            def classify(self, items, probabilities=False):
                assert probabilities
                results = []
                for item in items:
                    if self.name == "testlabelselect":
                        if item["test_job"]["name"] in labels_to_choose:
                            results.append([0.1, 0.9])
                        else:
                            results.append([0.9, 0.1])
                    elif self.name == "testgroupselect":
                        if item["test_job"]["name"] in groups_to_choose:
                            results.append([0.1, 0.9])
                        else:
                            results.append([0.9, 0.1])
                return np.array(results)

        def mock_get_model(modelname):
            return Model(modelname)

        monkeypatch.setattr(bugbug_http.models, "get_model", mock_get_model)

    return do_mock
        def generate_all_data():
            past_failures = test_scheduling.get_past_failures(granularity)

            push_num = past_failures["push_num"] if "push_num" in past_failures else 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                if not can_start:
                    if last_node == commit_data["node"]:
                        can_start = True

                    continue

                commit_map[commit_data["node"]] = commit_data

            with open(push_data_path, "r") as f:
                push_data = json.load(f)

            logger.info(f"push data nodes: {len(push_data)}")

            if granularity == "label":
                push_data = [
                    (
                        revisions,
                        rename_tasks(push_tasks),
                        rename_tasks(possible_regressions),
                        rename_tasks(likely_regressions),
                    )
                    for revisions, push_tasks, possible_regressions, likely_regressions in push_data
                ]

            # In the last 28 pushes, we definitely run all possible runnables.
            all_runnables_set = set(
                sum((push_runnables for _, push_runnables, _, _ in push_data[-28:]), [])
            )
            # Filter runnables we don't need.
            all_runnables = filter_runnables(
                list(all_runnables_set), all_runnables_set, granularity
            )
            all_runnables_set = set(all_runnables_set)
            logger.info(f"{len(all_runnables_set)} runnables run in the last 28 pushes")

            push_data = [
                (
                    revisions,
                    filter_runnables(push_tasks, all_runnables_set, granularity),
                    filter_runnables(
                        possible_regressions, all_runnables_set, granularity
                    ),
                    filter_runnables(
                        likely_regressions, all_runnables_set, granularity
                    ),
                )
                for revisions, push_tasks, possible_regressions, likely_regressions in push_data
            ]

            if granularity == "label":
                generate_failing_together_probabilities(push_data)

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            if granularity == "group":
                update_touched_together_gen = test_scheduling.update_touched_together()
                next(update_touched_together_gen)

            for i in tqdm(range(len(push_data))):
                (
                    revisions,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ) = push_data.pop(0)

                if not can_start:
                    if last_node == revisions[0]:
                        can_start = True

                    continue

                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision)
                    for revision in revisions
                    if revision in commit_map
                )
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions + likely_regressions)
                )

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity == "group":
                    update_touched_together_gen.send(commits[0]["node"])

                result = {
                    "revs": revisions,
                    "data": [],
                }
                for data in test_scheduling.generate_data(
                    past_failures,
                    merged_commits,
                    push_num,
                    runnables_to_consider,
                    possible_regressions,
                    likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result["data"].append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield result

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
예제 #13
0
파일: conftest.py 프로젝트: mozilla/bugbug
def mock_schedule_tests_classify(
    monkeypatch: MonkeyPatch,
) -> Callable[[dict[str, float], dict[str, float]], None]:
    with open("known_tasks", "w") as f:
        f.write("prova")

    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
        past_failures_data = test_scheduling.get_past_failures(granularity, False)
        past_failures_data["push_num"] = 1
        past_failures_data["all_runnables"] = [
            "test-linux1804-64-opt-label1",
            "test-linux1804-64-opt-label2",
            "test-group1",
            "test-group2",
            "test-linux1804-64/opt",
            "test-windows10/opt",
        ]
        past_failures_data.close()

    try:
        test_scheduling.close_failing_together_db("label")
    except AssertionError:
        pass
    failing_together = test_scheduling.get_failing_together_db("label", False)
    failing_together[b"test-linux1804-64/opt"] = pickle.dumps(
        {
            "test-windows10/opt": (0.1, 1.0),
        }
    )
    test_scheduling.close_failing_together_db("label")

    try:
        test_scheduling.close_failing_together_db("config_group")
    except AssertionError:
        pass
    failing_together = test_scheduling.get_failing_together_db("config_group", False)
    failing_together[b"$ALL_CONFIGS$"] = pickle.dumps(
        ["test-linux1804-64/opt", "test-windows10/debug", "test-windows10/opt"]
    )
    failing_together[b"$CONFIGS_BY_GROUP$"] = pickle.dumps(
        {
            "test-group1": {
                "test-linux1804-64/opt",
                "test-windows10/debug",
                "test-windows10/opt",
            },
            "test-group2": {
                "test-linux1804-64/opt",
                "test-windows10/debug",
                "test-windows10/opt",
            },
        }
    )
    failing_together[b"test-group1"] = pickle.dumps(
        {
            "test-linux1804-64/opt": {
                "test-windows10/debug": (1.0, 0.0),
                "test-windows10/opt": (1.0, 1.0),
            },
            "test-windows10/debug": {
                "test-windows10/opt": (1.0, 0.0),
            },
        }
    )
    test_scheduling.close_failing_together_db("config_group")

    try:
        test_scheduling.close_touched_together_db()
    except AssertionError:
        pass
    test_scheduling.get_touched_together_db(False)
    test_scheduling.close_touched_together_db()

    def do_mock(labels_to_choose, groups_to_choose):
        # Add a mock test selection model.
        def classify(self, items, probabilities=False):
            assert probabilities
            results = []
            for item in items:
                runnable_name = item["test_job"]["name"]
                if self.granularity == "label":
                    if runnable_name in labels_to_choose:
                        results.append(
                            [
                                1 - labels_to_choose[runnable_name],
                                labels_to_choose[runnable_name],
                            ]
                        )
                    else:
                        results.append([0.9, 0.1])
                elif self.granularity == "group":
                    if runnable_name in groups_to_choose:
                        results.append(
                            [
                                1 - groups_to_choose[runnable_name],
                                groups_to_choose[runnable_name],
                            ]
                        )
                    else:
                        results.append([0.9, 0.1])
            return np.array(results)

        monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE", MockModelCache())
        monkeypatch.setattr(
            bugbug.models.testselect.TestSelectModel, "classify", classify
        )

    return do_mock
예제 #14
0
    def __init__(
        self, model_name, cache_root, git_repo_dir, method_defect_predictor_dir
    ):
        self.model_name = model_name
        self.cache_root = cache_root

        assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        self.model = download_and_load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "fa5269b959d8ddf7e97d1e92523bb64c17f9bbcd",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

            past_bugs_by_function_path = "data/past_bugs_by_function.pickle"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "rb") as f:
                self.past_bugs_by_function = pickle.load(f)

        if model_name == "testselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_SCHEDULING_DB, test_scheduling.PAST_FAILURES_DB
            )
            self.past_failures_data = test_scheduling.get_past_failures()

            self.testfailure_model = download_and_load_model("testfailure")
            assert self.testfailure_model is not None
예제 #15
0
def test_generate_data(granularity):
    past_failures = test_scheduling.get_past_failures(granularity)

    commits = [
        {
            "types": ["C/C++"],
            "files": ["dom/file1.cpp"],
            "directories": ["dom"],
            "components": ["DOM"],
        },
        {
            "types": ["C/C++"],
            "files": ["dom/file1.cpp", "dom/file2.cpp"],
            "directories": ["dom"],
            "components": ["DOM"],
        },
        {
            "types": ["C/C++"],
            "files": ["layout/file.cpp"],
            "directories": ["layout"],
            "components": ["Layout"],
        },
        {
            "types": ["C/C++"],
            "files": ["layout/file.cpp"],
            "directories": ["layout"],
            "components": ["Layout"],
        },
        {
            "types": ["JavaScript", "C/C++"],
            "files": ["dom/file1.cpp", "dom/file1.js"],
            "directories": ["dom"],
            "components": ["DOM"],
        },
    ]

    data = list(
        test_scheduling.generate_data(past_failures, commits[0], 1,
                                      ["runnable1", "runnable2"], [], []))
    assert len(data) == 2
    assert data[0] == {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable1",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }
    assert data[1] == {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable2",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }

    data = list(
        test_scheduling.generate_data(past_failures, commits[1], 2,
                                      ["runnable1", "runnable2"],
                                      ["runnable1"], []))
    assert len(data) == 2
    assert data[0] == {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable1",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }
    assert data[1] == {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable2",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }

    data = list(
        test_scheduling.generate_data(past_failures, commits[2], 3,
                                      ["runnable1", "runnable2"], [],
                                      ["runnable2"]))
    assert len(data) == 2
    assert data[0] == {
        "failures": 1,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 1,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 1,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 1,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable1",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }
    assert data[1] == {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": True,
        "is_possible_regression": False,
        "name": "runnable2",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }

    data = list(
        test_scheduling.generate_data(past_failures, commits[3], 4,
                                      ["runnable1"], [], []))
    assert len(data) == 1
    assert data[0] == {
        "failures": 1,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 1,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 1,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 1,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable1",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }

    data = list(
        test_scheduling.generate_data(
            past_failures,
            commits[4],
            1500,
            ["runnable1", "runnable2"],
            ["runnable1", "runnable2"],
            [],
        ))
    assert len(data) == 2
    assert data[0] == {
        "failures": 1,
        "failures_in_components": 1,
        "failures_in_directories": 1,
        "failures_in_files": 1,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 1,
        "failures_past_2800_pushes_in_directories": 1,
        "failures_past_2800_pushes_in_files": 1,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable1",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }
    assert data[1] == {
        "failures": 1,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable2",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }

    data = list(
        test_scheduling.generate_data(
            past_failures,
            commits[4],
            2400,
            ["runnable1", "runnable2"],
            ["runnable1", "runnable2"],
            [],
        ))
    assert len(data) == 2
    assert data[0] == {
        "failures": 2,
        "failures_in_components": 2,
        "failures_in_directories": 2,
        "failures_in_files": 3,
        "failures_in_types": 3,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 1,
        "failures_past_1400_pushes_in_directories": 1,
        "failures_past_1400_pushes_in_files": 2,
        "failures_past_1400_pushes_in_types": 2,
        "failures_past_2800_pushes": 2,
        "failures_past_2800_pushes_in_components": 2,
        "failures_past_2800_pushes_in_directories": 2,
        "failures_past_2800_pushes_in_files": 3,
        "failures_past_2800_pushes_in_types": 3,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable1",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }
    assert data[1] == {
        "failures": 2,
        "failures_in_components": 1,
        "failures_in_directories": 1,
        "failures_in_files": 2,
        "failures_in_types": 3,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 1,
        "failures_past_1400_pushes_in_directories": 1,
        "failures_past_1400_pushes_in_files": 2,
        "failures_past_1400_pushes_in_types": 2,
        "failures_past_2800_pushes": 2,
        "failures_past_2800_pushes_in_components": 1,
        "failures_past_2800_pushes_in_directories": 1,
        "failures_past_2800_pushes_in_files": 2,
        "failures_past_2800_pushes_in_types": 3,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable2",
        "touched_together_directories": 0,
        "touched_together_files": 0,
    }
        def generate_all_data() -> Generator[Dict[str, Any], None, None]:
            past_failures = test_scheduling.get_past_failures(
                granularity, False)

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                commit_map[commit_data["node"]] = commit_data

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            if granularity in ("group", "config_group"):
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

            for (
                    i,
                (
                    revisions,
                    fix_revision,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ),
            ) in enumerate(tqdm(push_data_iter(), total=push_data_count)):
                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                # Skip wptsync commits, since they are not like normal pushes made by developers.
                if any(repository.is_wptsync(commit) for commit in commits):
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions +
                        likely_regressions))

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity in ("group", "config_group"):
                    update_touched_together_gen.send(commits[0]["node"])

                result_data = []
                for data in test_scheduling.generate_data(
                        granularity,
                        past_failures,
                        merged_commits,
                        push_num,
                        runnables_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result_data.append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield {
                        "revs": revisions,
                        "data": result_data,
                    }

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(
                f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
예제 #17
0
    def evaluation(self) -> None:
        # Get a test set of pushes on which to test the model.
        pushes, train_push_len = self.get_pushes(False)

        # To evaluate the model with reductions enabled, we need to regenerate the failing together DB, using
        # only failure data from the training pushes (otherwise, we'd leak training information into the test
        # set).
        print("Generate failing together DB (restricted to training pushes)")
        push_data_iter, push_data_count, _ = test_scheduling.get_push_data(
            "label" if self.granularity == "label" else "config_group"
        )
        test_scheduling.generate_failing_together_probabilities(
            "label" if self.granularity == "label" else "config_group",
            push_data_iter(),
            push_data_count,
            pushes[train_push_len - 1]["revs"][0],
        )

        test_pushes_list = pushes[train_push_len:]

        all_tasks = reduce(
            lambda x, y: x | y,
            (
                set(push["failures"]) | set(push["passes"])
                for push in test_pushes_list[-28:]
            ),
        )

        all_revs = set(sum((push["revs"] for push in test_pushes_list), []))

        test_pushes_failures = sum(
            1 for push in test_pushes_list if len(push["failures"]) > 0
        )

        test_pushes = {push["revs"][0]: push for push in test_pushes_list}

        if self.granularity == "group":
            for (
                revisions,
                fix_revision,
                push_runnables,
                possible_regressions,
                likely_regressions,
            ) in tqdm(push_data_iter(), total=push_data_count):
                if revisions[0] not in test_pushes:
                    continue

                test_pushes[revisions[0]]["config_group_failures"] = (
                    possible_regressions + likely_regressions
                )

        print(
            f"Testing on {len(test_pushes)} ({test_pushes_failures} with failures) out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
        )

        del pushes

        commit_map = get_commit_map(all_revs)

        past_failures_data = test_scheduling.get_past_failures(self.granularity, True)
        last_push_num = past_failures_data["push_num"]
        past_failures_data.close()

        # Select tests for all the pushes in the test set.
        for i, push in enumerate(tqdm(test_pushes.values())):
            commits = tuple(
                commit_map.pop(revision)
                for revision in push["revs"]
                if revision in commit_map
            )
            if len(commits) == 0:
                push["all_possibly_selected"] = {}
                continue

            push_num = last_push_num - (len(test_pushes) - (i + 1))

            # Note: we subtract 100 to the push number to make sure we don't use
            # past failure data for the push itself.
            # The number 100 comes from the fact that in the past failure data
            # generation we store past failures in batches of 100 pushes.
            push["all_possibly_selected"] = self.select_tests(
                commits, 0.5, push_num - 100
            )

        def do_eval(
            executor: concurrent.futures.ProcessPoolExecutor,
            confidence_threshold: float,
            reduction: Optional[float],
            cap: Optional[int],
            minimum: Optional[int],
        ) -> None:
            futures: Dict[concurrent.futures.Future, Dict[str, Any]] = {}
            for push in test_pushes.values():
                futures[
                    executor.submit(
                        eval_apply_transforms,
                        self,
                        push,
                        confidence_threshold,
                        reduction,
                        cap,
                        minimum,
                    )
                ] = push

            for future in concurrent.futures.as_completed(futures):
                exc = future.exception()
                if exc is not None:
                    print(
                        "Exception {} while running {}".format(
                            exc, futures[future]["revs"][0]
                        )
                    )
                    for f in futures:
                        f.cancel()

                push = futures[future]
                selected, group_configs = future.result()

                if reduction is not None and self.granularity == "group":
                    push["number_configs"] = len(
                        set(
                            sum(
                                group_configs.values(),
                                [],
                            )
                        )
                    )
                    selected_config_groups = set(
                        (config, group)
                        for group, configs in group_configs.items()
                        for config in configs
                    )
                    caught_config_groups = selected_config_groups & set(
                        push["config_group_failures"]
                    )
                    push["caught_one_config_group"] = (
                        len(caught_config_groups) > 0
                        if len(push["config_group_failures"]) != 0
                        else None
                    )
                    push["caught_percentage_config_group"] = (
                        len(caught_config_groups) / len(push["config_group_failures"])
                        if len(push["config_group_failures"]) != 0
                        else None
                    )

                caught = selected & set(push["failures"])

                push["number_scheduled"] = len(selected)
                push["caught_one"] = (
                    len(caught) > 0 if len(push["failures"]) != 0 else None
                )
                push["some_didnt_run"] = (
                    not selected.issubset(set(push["passes"]) | set(push["failures"])),
                )
                push["caught_percentage"] = (
                    len(caught) / len(push["failures"])
                    if len(push["failures"]) != 0
                    else None
                )

            min_scheduled = min(
                result["number_scheduled"] for result in test_pushes.values()
            )
            max_scheduled = max(
                result["number_scheduled"] for result in test_pushes.values()
            )
            average_scheduled = statistics.mean(
                result["number_scheduled"] for result in test_pushes.values()
            )
            num_failing_pushes = sum(
                1 for result in test_pushes.values() if result["caught_one"] is not None
            )
            num_caught_one = sum(
                1 for result in test_pushes.values() if result["caught_one"]
            )
            num_caught_one_or_some_didnt_run = sum(
                1
                for result in test_pushes.values()
                if result["caught_one"]
                or (result["caught_one"] is not None and result["some_didnt_run"])
            )
            percentage_caught_one = 100 * num_caught_one / num_failing_pushes
            percentage_caught_one_or_some_didnt_run = (
                100 * num_caught_one_or_some_didnt_run / num_failing_pushes
            )
            average_caught_percentage = 100 * statistics.mean(
                result["caught_percentage"]
                for result in test_pushes.values()
                if result["caught_percentage"] is not None
            )

            reduction_str = (
                f"enabled at {reduction * 100}%"
                if reduction is not None
                else "disabled"
            )

            message = f"For confidence threshold {confidence_threshold}, with reduction {reduction_str}, cap at {cap}, and minimum at {minimum}: scheduled {average_scheduled} tasks on average (min {min_scheduled}, max {max_scheduled}). In {percentage_caught_one}% of pushes we caught at least one failure ({percentage_caught_one_or_some_didnt_run}% ignoring misses when some of our selected tasks didn't run). On average, we caught {average_caught_percentage}% of all seen failures."

            if reduction is not None and self.granularity == "group":
                average_configs = statistics.mean(
                    result["number_configs"] for result in test_pushes.values()
                )
                median_configs = statistics.median(
                    result["number_configs"] for result in test_pushes.values()
                )
                message += f" On average, we selected {average_configs} configs (a median of {median_configs} configs)."

                num_caught_one_config_group = sum(
                    1
                    for result in test_pushes.values()
                    if result["caught_one_config_group"]
                )
                percentage_caught_one_config_group = (
                    100 * num_caught_one_config_group / num_failing_pushes
                )
                average_caught_percentage_config_group = 100 * statistics.mean(
                    result["caught_percentage_config_group"]
                    for result in test_pushes.values()
                    if result["caught_percentage_config_group"] is not None
                )

                message += f" In {percentage_caught_one_config_group}% of pushes we caught at least one config/group failure. On average, we caught {average_caught_percentage_config_group}% of all seen config/group failures."

            print(message)

        with concurrent.futures.ProcessPoolExecutor(
            max_workers=utils.get_physical_cpu_count()
        ) as executor:
            scenarios = [
                (None, None, None),
                (10, None, None),
                (None, 300, None),
                (None, None, 0.9),
                (None, None, 1.0),
            ]
            for minimum, cap, reduction in scenarios:
                # Pre-generate equivalence sets, so when we run the config selection in multiple processes
                # we don't risk concurrent writes to the equivalence sets file.
                if reduction is not None and self.granularity == "group":
                    self._get_equivalence_sets(reduction)

                for confidence_threshold in [0.5, 0.7, 0.8, 0.85, 0.9, 0.95]:
                    do_eval(executor, confidence_threshold, reduction, cap, minimum)
예제 #18
0
    def evaluation(self):
        # Get a test set of pushes on which to test the model.
        pushes, train_push_len = self.get_pushes(False)

        # To evaluate the model with reductions enabled, we need to regenerate the failing together DB, using
        # only failure data from the training pushes (otherwise, we'd leak training information into the test
        # set).
        if self.granularity == "label":
            print(
                "Generate failing together DB (restricted to training pushes)")
            push_data, _ = test_scheduling.get_push_data("label")
            test_scheduling.generate_failing_together_probabilities(
                push_data, pushes[train_push_len - 1]["revs"][0])

        test_pushes = pushes[train_push_len:]

        all_tasks = reduce(
            lambda x, y: x | y,
            (set(push["failures"]) | set(push["passes"])
             for push in test_pushes[-28:]),
        )

        test_pushes_failures = sum(1 for push in test_pushes
                                   if len(push["failures"]) > 0)

        test_pushes = {push["revs"][0]: push for push in test_pushes}

        print(
            f"Testing on {len(test_pushes)} ({test_pushes_failures} with failures) out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
        )

        commit_map = get_commit_map()

        past_failures_data = test_scheduling.get_past_failures(
            self.granularity)
        last_push_num = past_failures_data["push_num"]
        past_failures_data.close()

        # Select tests for all the pushes in the test set.
        for i, (rev, push) in enumerate(tqdm(test_pushes.items())):
            commits = tuple(commit_map[revision] for revision in push["revs"]
                            if revision in commit_map)
            if len(commits) == 0:
                test_pushes[rev]["all_possibly_selected"] = {}
                continue

            push_num = last_push_num - (len(test_pushes) - (i + 1))

            # Note: we subtract 100 to the push number to make sure we don't use
            # past failure data for the push itself.
            # The number 100 comes from the fact that in the past failure data
            # generation we store past failures in batches of 100 pushes.
            test_pushes[rev]["all_possibly_selected"] = self.select_tests(
                commits, 0.3, push_num - 100)

        reductions = [None]
        if self.granularity == "label":
            reductions += [0.9, 1.0]

        def do_eval(confidence_threshold, reduction, cap, minimum):
            for rev, push in test_pushes.items():
                selected = set(name for name, confidence in
                               push["all_possibly_selected"].items()
                               if confidence >= confidence_threshold)

                if minimum is not None and len(selected) < minimum:
                    remaining = [(name, confidence) for name, confidence in
                                 push["all_possibly_selected"].items()
                                 if name not in selected]
                    selected.update(name for name, _ in sorted(
                        remaining, key=lambda x: -x[1])[:minimum -
                                                        len(selected)])

                if reduction is not None:
                    selected = self.reduce(selected, reduction)

                if cap is not None and len(selected) > cap:
                    selected = set(
                        sorted(
                            ((name, confidence) for name, confidence in
                             push["all_possibly_selected"].items()
                             if name in selected),
                            key=lambda x: x[1],
                            reverse=True,
                        )[:cap])

                caught = selected & set(push["failures"])

                push["number_scheduled"] = len(selected)
                push["caught_one"] = (len(caught) > 0
                                      if len(push["failures"]) != 0 else None)
                push["some_didnt_run"] = (not selected.issubset(
                    set(push["passes"]) | set(push["failures"])), )
                push["caught_percentage"] = (len(caught) /
                                             len(push["failures"])
                                             if len(push["failures"]) != 0 else
                                             None)

            min_scheduled = min(result["number_scheduled"]
                                for result in test_pushes.values())
            max_scheduled = max(result["number_scheduled"]
                                for result in test_pushes.values())
            average_scheduled = statistics.mean(
                result["number_scheduled"] for result in test_pushes.values())
            num_failing_pushes = sum(1 for result in test_pushes.values()
                                     if result["caught_one"] is not None)
            num_caught_one = sum(1 for result in test_pushes.values()
                                 if result["caught_one"])
            num_caught_one_or_some_didnt_run = sum(
                1 for result in test_pushes.values()
                if result["caught_one"] or (result["caught_one"] is not None
                                            and result["some_didnt_run"]))
            percentage_caught_one = 100 * num_caught_one / num_failing_pushes
            percentage_caught_one_or_some_didnt_run = (
                100 * num_caught_one_or_some_didnt_run / num_failing_pushes)
            average_caught_percentage = 100 * statistics.mean(
                result["caught_percentage"] for result in test_pushes.values()
                if result["caught_percentage"] is not None)

            reduction_str = (f"enabled at {reduction * 100}%"
                             if reduction is not None else "disabled")

            print(
                f"For confidence threshold {confidence_threshold}, with reduction {reduction_str}, and cap at {cap}: scheduled {average_scheduled} tasks on average (min {min_scheduled}, max {max_scheduled}). In {percentage_caught_one}% of pushes we caught at least one failure ({percentage_caught_one_or_some_didnt_run}% ignoring misses when some of our selected tasks didn't run). On average, we caught {average_caught_percentage}% of all seen failures."
            )

        for minimum in [None, 10]:
            for cap in [None, 300, 500]:
                for reduction in reductions:
                    for confidence_threshold in [
                            0.5, 0.7, 0.8, 0.85, 0.9, 0.95
                    ]:
                        do_eval(confidence_threshold, reduction, cap, minimum)
예제 #19
0
        def generate_all_data():
            past_failures = test_scheduling.get_past_failures()

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                if not can_start:
                    if last_node == commit_data["node"]:
                        can_start = True

                    continue

                commit_map[commit_data["node"]] = commit_data

            with open("push_data.json", "r") as f:
                push_data = json.load(f)[1:]

            logger.info(f"push data nodes: {len(push_data)}")

            # In the last 28 pushes, we definitely run all possible tasks.
            all_tasks_set = set(
                sum((push_tasks for _, push_tasks, _, _ in push_data[-28:]),
                    []))
            # Filter tasks we don't need.
            all_tasks = filter_tasks(list(all_tasks_set), all_tasks_set)
            all_tasks_set = set(all_tasks)
            logger.info(
                f"{len(all_tasks_set)} tasks run in the last 28 pushes")

            # Store all tasks in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_tasks"] = all_tasks
            # XXX: Should we recreate the DB from scratch if the previous all_tasks are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_tasks = 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            for i in tqdm(range(len(push_data))):
                (
                    revisions,
                    push_tasks,
                    possible_regressions,
                    likely_regressions,
                ) = push_data.pop(0)

                if not can_start:
                    if last_node == revisions[0]:
                        can_start = True

                    continue

                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_tasks, we'd generate a huge amount of data.
                # So we consider only the tasks which run in this push, and the possible and likely regressions
                # from this push.
                tasks_to_consider = list(
                    set(push_tasks + possible_regressions +
                        likely_regressions))
                tasks_to_consider = filter_tasks(tasks_to_consider,
                                                 all_tasks_set)

                if len(tasks_to_consider) == 0:
                    skipped_no_tasks += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                for data in test_scheduling.generate_data(
                        past_failures,
                        merged_commits,
                        push_num,
                        tasks_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        saved_nodes.add(i)
                        data["revs"] = revisions
                        yield data

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(f"skipped {skipped_no_tasks} (no interesting tasks)")

            past_failures["push_num"] = push_num
            past_failures.close()
예제 #20
0
    def __init__(
        self,
        model_name: str,
        repo_dir: str,
        git_repo_dir: str,
        method_defect_predictor_dir: str,
        use_single_process: bool,
        skip_feature_importance: bool,
    ):
        self.model_name = model_name
        self.repo_dir = repo_dir

        self.model = Model.load(download_model(model_name))
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo(
                "hg::https://hg.mozilla.org/mozilla-central", git_repo_dir
            )

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "8cc47f47ffb686a29324435a0151b5fabd37f865",
            )

        self.use_single_process = use_single_process
        self.skip_feature_importance = skip_feature_importance

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            with open(model_data_X_path, "rb") as fb:
                self.X = to_array(pickle.load(fb))

            with open(model_data_y_path, "rb") as fb:
                self.y = to_array(pickle.load(fb))

            past_bugs_by_function_path = "data/past_fixed_bugs_by_function.json"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "r") as f:
                self.past_bugs_by_function = json.load(f)

        if model_name == "testlabelselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_LABEL_SCHEDULING_DB,
                test_scheduling.PAST_FAILURES_LABEL_DB,
            )
            self.past_failures_data = test_scheduling.get_past_failures("label", True)

            self.testfailure_model = cast(
                TestFailureModel, TestFailureModel.load(download_model("testfailure"))
            )
            assert self.testfailure_model is not None
예제 #21
0
파일: testselect.py 프로젝트: e7dal/bugbug
    def evaluation(self):
        # Get a test set of pushes on which to test the model.
        pushes, train_push_len = self.get_pushes()

        test_pushes = pushes[train_push_len:]

        all_tasks = reduce(
            lambda x, y: x | y,
            (set(push["failures"]) | set(push["passes"])
             for push in test_pushes[-28:]),
        )

        test_pushes = {push["revs"][0]: push for push in test_pushes}

        print(
            f"Testing on {len(pushes) - train_push_len} out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
        )

        commit_map = get_commit_map()

        past_failures_data = test_scheduling.get_past_failures(
            self.granularity)
        last_push_num = past_failures_data["push_num"]
        past_failures_data.close()

        # Select tests for all the pushes in the test set.
        for i, (rev, push) in enumerate(tqdm(test_pushes.items())):
            commits = tuple(commit_map[revision] for revision in push["revs"]
                            if revision in commit_map)
            if len(commits) == 0:
                continue

            push_num = last_push_num - (len(test_pushes) - (i + 1))

            # Note: we subtract 100 to the push number to make sure we don't use
            # past failure data for the push itself.
            # The number 100 comes from the fact that in the past failure data
            # generation we store past failures in batches of 100 pushes.
            test_pushes[rev]["all_possibly_selected"] = self.select_tests(
                commits, 0.3, push_num - 100)

        reductions = [None]
        if self.granularity == "label":
            reductions += [0.7, 0.8, 0.9, 1.0]

        for reduction in reductions:
            for confidence_threshold in [0.3, 0.5, 0.7, 0.8]:
                for rev, push in test_pushes.items():
                    selected = set(name for name, confidence in
                                   push["all_possibly_selected"].items()
                                   if confidence >= confidence_threshold)

                    if reduction is not None:
                        selected = self.reduce(selected, reduction)

                    caught = selected & set(push["failures"])

                    push["number_scheduled"] = len(selected)
                    push["caught_one"] = (len(caught) > 0 if
                                          len(push["failures"]) != 0 else None)
                    push["some_didnt_run"] = (not selected.issubset(
                        set(push["passes"]) | set(push["failures"])), )
                    push["caught_percentage"] = (len(caught) /
                                                 len(push["failures"])
                                                 if len(push["failures"]) != 0
                                                 else None)

                min_scheduled = min(result["number_scheduled"]
                                    for result in test_pushes.values())
                max_scheduled = max(result["number_scheduled"]
                                    for result in test_pushes.values())
                average_scheduled = statistics.mean(
                    result["number_scheduled"]
                    for result in test_pushes.values())
                num_failing_pushes = sum(1 for result in test_pushes.values()
                                         if result["caught_one"] is not None)
                num_caught_one = sum(1 for result in test_pushes.values()
                                     if result["caught_one"])
                num_caught_one_or_some_didnt_run = sum(
                    1 for result in test_pushes.values()
                    if result["caught_one"] or (
                        result["caught_one"] is not None
                        and result["some_didnt_run"]))
                percentage_caught_one = 100 * num_caught_one / num_failing_pushes
                percentage_caught_one_or_some_didnt_run = (
                    100 * num_caught_one_or_some_didnt_run /
                    num_failing_pushes)
                average_caught_percentage = 100 * statistics.mean(
                    result["caught_percentage"]
                    for result in test_pushes.values()
                    if result["caught_percentage"] is not None)

                reduction_str = (f"enabled at {reduction * 100}%"
                                 if reduction is not None else "disabled")

                print(
                    f"For confidence threshold {confidence_threshold}, with reduction {reduction_str}: scheduled {average_scheduled} tasks on average (min {min_scheduled}, max {max_scheduled}). In {percentage_caught_one}% of pushes we caught at least one failure ({percentage_caught_one_or_some_didnt_run}% ignoring misses when some of our selected tasks didn't run). On average, we caught {average_caught_percentage}% of all seen failures."
                )
예제 #22
0
def test_select_configs(failing_together_config_group: LMDBDict) -> None:
    past_failures_data = test_scheduling.get_past_failures("group", False)
    past_failures_data["all_runnables"] = ["group1", "group2"]
    past_failures_data.close()

    failing_together_config_group[b"group1"] = pickle.dumps({
        "linux1804-64-asan/debug": {
            "linux1804-64/debug": (1.0, 0.0),
            "linux1804-64/opt": (1.0, 0.0),
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 0.0),
        },
        "linux1804-64/debug": {
            "linux1804-64/opt": (1.0, 1.0),
            "mac/debug": (1.0, 1.0),
            "windows10/debug": (1.0, 1.0),
        },
        "linux1804-64/opt": {
            "mac/debug": (1.0, 1.0),
            "windows10/debug": (1.0, 1.0),
        },
        "mac/debug": {
            "windows10/debug": (1.0, 1.0)
        },
    })
    failing_together_config_group[b"group2"] = pickle.dumps({
        "linux1804-64-asan/debug": {
            "linux1804-64/debug": (1.0, 1.0),
            "linux1804-64/opt": (1.0, 0.0),
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 0.0),
        },
        "linux1804-64/debug": {
            "linux1804-64/opt": (1.0, 0.0),
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 1.0),
        },
        "linux1804-64/opt": {
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 0.0),
        },
        "mac/debug": {
            "windows10/debug": (1.0, 0.0)
        },
    })
    failing_together_config_group[b"$ALL_CONFIGS$"] = pickle.dumps([
        "linux1804-64-asan/debug",
        "linux1804-64/debug",
        "linux1804-64/opt",
        "mac/debug",
        "windows10/debug",
    ])
    failing_together_config_group[b"$CONFIGS_BY_GROUP$"] = pickle.dumps({
        "group1": {
            "linux1804-64-asan/debug",
            "linux1804-64/debug",
            "linux1804-64/opt",
            "mac/debug",
            "windows10/debug",
        },
        "group2": {
            "linux1804-64-asan/debug",
            "linux1804-64/debug",
            "linux1804-64/opt",
            "mac/debug",
            "windows10/debug",
        },
    })
    test_scheduling.close_failing_together_db("config_group")

    model = TestGroupSelectModel()
    result = model.select_configs(
        {
            "group1",
            "group2",
        },
        1.0,
    )
    assert len(result) == 2
    assert set(
        result["group1"]) == {"linux1804-64-asan/debug", "linux1804-64/opt"}
    assert set(result["group2"]) == {
        "linux1804-64/opt",
        "mac/debug",
        "linux1804-64/debug",
    }
예제 #23
0
def test_generate_data(granularity: str) -> None:
    past_failures = test_scheduling.get_past_failures(granularity, False)

    commits = [
        CommitDict({
            "types": ["C/C++"],
            "files": ["dom/file1.cpp"],
            "directories": ["dom"],
            "components": ["DOM"],
        }),
        CommitDict({
            "types": ["C/C++"],
            "files": ["dom/file1.cpp", "dom/file2.cpp"],
            "directories": ["dom"],
            "components": ["DOM"],
        }),
        CommitDict({
            "types": ["C/C++"],
            "files": ["layout/file.cpp"],
            "directories": ["layout"],
            "components": ["Layout"],
        }),
        CommitDict({
            "types": ["C/C++"],
            "files": ["layout/file.cpp"],
            "directories": ["layout"],
            "components": ["Layout"],
        }),
        CommitDict({
            "types": ["JavaScript", "C/C++"],
            "files": ["dom/file1.cpp", "dom/file1.js"],
            "directories": ["dom"],
            "components": ["DOM"],
        }),
    ]

    data = list(
        test_scheduling.generate_data(
            granularity,
            past_failures,
            commits[0],
            1,
            ["runnable1", "runnable2"],
            [],
            [],
        ))
    assert len(data) == 2
    obj = {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable1",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[0] == obj

    obj = {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable2",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[1] == obj

    data = list(
        test_scheduling.generate_data(
            granularity,
            past_failures,
            commits[1],
            2,
            ["runnable1", "runnable2"],
            ["runnable1"],
            [],
        ))
    assert len(data) == 2
    obj = {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable1",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[0] == obj
    obj = {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable2",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[1] == obj

    data = list(
        test_scheduling.generate_data(
            granularity,
            past_failures,
            commits[2],
            3,
            ["runnable1", "runnable2"],
            [],
            ["runnable2"],
        ))
    assert len(data) == 2
    obj = {
        "failures": 1,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 1,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 1,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 1,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable1",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[0] == obj
    obj = {
        "failures": 0,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 0,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 0,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 0,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": True,
        "is_possible_regression": False,
        "name": "runnable2",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[1] == obj

    data = list(
        test_scheduling.generate_data(granularity, past_failures, commits[3],
                                      4, ["runnable1"], [], []))
    assert len(data) == 1
    obj = {
        "failures": 1,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 1,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 1,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 1,
        "is_likely_regression": False,
        "is_possible_regression": False,
        "name": "runnable1",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[0] == obj

    data = list(
        test_scheduling.generate_data(
            granularity,
            past_failures,
            commits[4],
            1500,
            ["runnable1", "runnable2"],
            ["runnable1", "runnable2"],
            [],
        ))
    assert len(data) == 2
    obj = {
        "failures": 1,
        "failures_in_components": 1,
        "failures_in_directories": 1,
        "failures_in_files": 1,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 1,
        "failures_past_2800_pushes_in_directories": 1,
        "failures_past_2800_pushes_in_files": 1,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable1",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[0] == obj
    obj = {
        "failures": 1,
        "failures_in_components": 0,
        "failures_in_directories": 0,
        "failures_in_files": 0,
        "failures_in_types": 1,
        "failures_past_1400_pushes": 0,
        "failures_past_1400_pushes_in_components": 0,
        "failures_past_1400_pushes_in_directories": 0,
        "failures_past_1400_pushes_in_files": 0,
        "failures_past_1400_pushes_in_types": 0,
        "failures_past_2800_pushes": 1,
        "failures_past_2800_pushes_in_components": 0,
        "failures_past_2800_pushes_in_directories": 0,
        "failures_past_2800_pushes_in_files": 0,
        "failures_past_2800_pushes_in_types": 1,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable2",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[1] == obj

    data = list(
        test_scheduling.generate_data(
            granularity,
            past_failures,
            commits[4],
            2400,
            ["runnable1", "runnable2"],
            ["runnable1", "runnable2"],
            [],
        ))
    assert len(data) == 2
    obj = {
        "failures": 2,
        "failures_in_components": 2,
        "failures_in_directories": 2,
        "failures_in_files": 3,
        "failures_in_types": 3,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 1,
        "failures_past_1400_pushes_in_directories": 1,
        "failures_past_1400_pushes_in_files": 2,
        "failures_past_1400_pushes_in_types": 2,
        "failures_past_2800_pushes": 2,
        "failures_past_2800_pushes_in_components": 2,
        "failures_past_2800_pushes_in_directories": 2,
        "failures_past_2800_pushes_in_files": 3,
        "failures_past_2800_pushes_in_types": 3,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable1",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[0] == obj
    obj = {
        "failures": 2,
        "failures_in_components": 1,
        "failures_in_directories": 1,
        "failures_in_files": 2,
        "failures_in_types": 3,
        "failures_past_1400_pushes": 1,
        "failures_past_1400_pushes_in_components": 1,
        "failures_past_1400_pushes_in_directories": 1,
        "failures_past_1400_pushes_in_files": 2,
        "failures_past_1400_pushes_in_types": 2,
        "failures_past_2800_pushes": 2,
        "failures_past_2800_pushes_in_components": 1,
        "failures_past_2800_pushes_in_directories": 1,
        "failures_past_2800_pushes_in_files": 2,
        "failures_past_2800_pushes_in_types": 3,
        "failures_past_700_pushes": 0,
        "failures_past_700_pushes_in_components": 0,
        "failures_past_700_pushes_in_directories": 0,
        "failures_past_700_pushes_in_files": 0,
        "failures_past_700_pushes_in_types": 0,
        "is_likely_regression": False,
        "is_possible_regression": True,
        "name": "runnable2",
    }
    if granularity == "group":
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[1] == obj