def generate_component_connections(self, bug_map: Dict[int,
                                                           bugzilla.BugDict],
                                       bugs: List[int]) -> None:
        bugs_set = set(bugs)
        commits = [
            commit for commit in repository.get_commits()
            if commit["bug_id"] in bugs_set
        ]
        commit_map = {commit["node"]: commit for commit in commits}

        # Retrieve components of test failures that occurred when landing patches to fix bugs in specific components.
        component_failures = collections.defaultdict(list)

        push_data_iter, push_data_count, all_runnables = test_scheduling.get_push_data(
            "group")

        for revisions, _, _, possible_regressions, likely_regressions in tqdm(
                push_data_iter(), total=push_data_count):
            commit_list = [
                commit_map[revision] for revision in revisions
                if revision in commit_map
            ]
            if len(commit_list) == 0:
                continue

            commit_bugs = [
                bug_map[commit["bug_id"]] for commit in commit_list
                if commit["bug_id"] in bug_map
            ]

            components = list(
                set(get_full_component(bug) for bug in commit_bugs))

            groups = [
                group for group in list(
                    set(possible_regressions + likely_regressions))
                if group.encode("utf-8") in self.path_to_component
            ]

            for group in groups:
                for component in components:
                    component_failures[component].append(
                        self.path_to_component[group.encode(
                            "utf-8")].tobytes().decode("utf-8"))

        # Filter out commits for which we have no bugs.
        commits = [commit for commit in commits if commit["bug_id"] in bug_map]

        # Sort commits by bug component, so we can use itertools.groupby to group them by bug component.
        commits.sort(key=lambda x: get_full_component(bug_map[x["bug_id"]]))

        commit_groups = []
        for component, commit_iter in itertools.groupby(
                commits, lambda x: get_full_component(bug_map[x["bug_id"]])):
            commit_group = {
                "component": component,
                "most_common_test_failure_components":
                histogram(component_failures[component])
                if component in component_failures else {},
            }
            self.get_prev_bugs_stats(
                commit_group,
                list(commit_iter),
                component,
            )
            commit_groups.append(commit_group)

        with open("component_connections.json", "w") as f:
            json.dump(commit_groups, f)

        repository.close_component_mapping()
Esempio n. 2
0
    def evaluation(self):
        # Get a test set of pushes on which to test the model.
        pushes, train_push_len = self.get_pushes(False)

        # To evaluate the model with reductions enabled, we need to regenerate the failing together DB, using
        # only failure data from the training pushes (otherwise, we'd leak training information into the test
        # set).
        if self.granularity == "label":
            print(
                "Generate failing together DB (restricted to training pushes)")
            push_data, _ = test_scheduling.get_push_data("label")
            test_scheduling.generate_failing_together_probabilities(
                push_data, pushes[train_push_len - 1]["revs"][0])

        test_pushes = pushes[train_push_len:]

        all_tasks = reduce(
            lambda x, y: x | y,
            (set(push["failures"]) | set(push["passes"])
             for push in test_pushes[-28:]),
        )

        test_pushes_failures = sum(1 for push in test_pushes
                                   if len(push["failures"]) > 0)

        test_pushes = {push["revs"][0]: push for push in test_pushes}

        print(
            f"Testing on {len(test_pushes)} ({test_pushes_failures} with failures) out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
        )

        commit_map = get_commit_map()

        past_failures_data = test_scheduling.get_past_failures(
            self.granularity)
        last_push_num = past_failures_data["push_num"]
        past_failures_data.close()

        # Select tests for all the pushes in the test set.
        for i, (rev, push) in enumerate(tqdm(test_pushes.items())):
            commits = tuple(commit_map[revision] for revision in push["revs"]
                            if revision in commit_map)
            if len(commits) == 0:
                test_pushes[rev]["all_possibly_selected"] = {}
                continue

            push_num = last_push_num - (len(test_pushes) - (i + 1))

            # Note: we subtract 100 to the push number to make sure we don't use
            # past failure data for the push itself.
            # The number 100 comes from the fact that in the past failure data
            # generation we store past failures in batches of 100 pushes.
            test_pushes[rev]["all_possibly_selected"] = self.select_tests(
                commits, 0.3, push_num - 100)

        reductions = [None]
        if self.granularity == "label":
            reductions += [0.9, 1.0]

        def do_eval(confidence_threshold, reduction, cap, minimum):
            for rev, push in test_pushes.items():
                selected = set(name for name, confidence in
                               push["all_possibly_selected"].items()
                               if confidence >= confidence_threshold)

                if minimum is not None and len(selected) < minimum:
                    remaining = [(name, confidence) for name, confidence in
                                 push["all_possibly_selected"].items()
                                 if name not in selected]
                    selected.update(name for name, _ in sorted(
                        remaining, key=lambda x: -x[1])[:minimum -
                                                        len(selected)])

                if reduction is not None:
                    selected = self.reduce(selected, reduction)

                if cap is not None and len(selected) > cap:
                    selected = set(
                        sorted(
                            ((name, confidence) for name, confidence in
                             push["all_possibly_selected"].items()
                             if name in selected),
                            key=lambda x: x[1],
                            reverse=True,
                        )[:cap])

                caught = selected & set(push["failures"])

                push["number_scheduled"] = len(selected)
                push["caught_one"] = (len(caught) > 0
                                      if len(push["failures"]) != 0 else None)
                push["some_didnt_run"] = (not selected.issubset(
                    set(push["passes"]) | set(push["failures"])), )
                push["caught_percentage"] = (len(caught) /
                                             len(push["failures"])
                                             if len(push["failures"]) != 0 else
                                             None)

            min_scheduled = min(result["number_scheduled"]
                                for result in test_pushes.values())
            max_scheduled = max(result["number_scheduled"]
                                for result in test_pushes.values())
            average_scheduled = statistics.mean(
                result["number_scheduled"] for result in test_pushes.values())
            num_failing_pushes = sum(1 for result in test_pushes.values()
                                     if result["caught_one"] is not None)
            num_caught_one = sum(1 for result in test_pushes.values()
                                 if result["caught_one"])
            num_caught_one_or_some_didnt_run = sum(
                1 for result in test_pushes.values()
                if result["caught_one"] or (result["caught_one"] is not None
                                            and result["some_didnt_run"]))
            percentage_caught_one = 100 * num_caught_one / num_failing_pushes
            percentage_caught_one_or_some_didnt_run = (
                100 * num_caught_one_or_some_didnt_run / num_failing_pushes)
            average_caught_percentage = 100 * statistics.mean(
                result["caught_percentage"] for result in test_pushes.values()
                if result["caught_percentage"] is not None)

            reduction_str = (f"enabled at {reduction * 100}%"
                             if reduction is not None else "disabled")

            print(
                f"For confidence threshold {confidence_threshold}, with reduction {reduction_str}, and cap at {cap}: scheduled {average_scheduled} tasks on average (min {min_scheduled}, max {max_scheduled}). In {percentage_caught_one}% of pushes we caught at least one failure ({percentage_caught_one_or_some_didnt_run}% ignoring misses when some of our selected tasks didn't run). On average, we caught {average_caught_percentage}% of all seen failures."
            )

        for minimum in [None, 10]:
            for cap in [None, 300, 500]:
                for reduction in reductions:
                    for confidence_threshold in [
                            0.5, 0.7, 0.8, 0.85, 0.9, 0.95
                    ]:
                        do_eval(confidence_threshold, reduction, cap, minimum)
    def generate_test_scheduling_history(self, granularity: str,
                                         training_months: int) -> None:
        if granularity != "config_group":
            # Get the commits DB.
            assert db.download(repository.COMMITS_DB)

        HISTORY_DATE_START = datetime.now() - relativedelta(
            months=training_months)

        if granularity == "label":
            test_scheduling_db = test_scheduling.TEST_LABEL_SCHEDULING_DB
            past_failures_db = os.path.join(
                "data", test_scheduling.PAST_FAILURES_LABEL_DB)
            failing_together_db = os.path.join(
                "data", test_scheduling.FAILING_TOGETHER_LABEL_DB)
        elif granularity == "group":
            test_scheduling_db = test_scheduling.TEST_GROUP_SCHEDULING_DB
            past_failures_db = os.path.join(
                "data", test_scheduling.PAST_FAILURES_GROUP_DB)
            touched_together_db = os.path.join(
                "data", test_scheduling.TOUCHED_TOGETHER_DB)
        elif granularity == "config_group":
            test_scheduling_db = test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB
            past_failures_db = os.path.join(
                "data", test_scheduling.PAST_FAILURES_CONFIG_GROUP_DB)
            failing_together_db = os.path.join(
                "data", test_scheduling.FAILING_TOGETHER_CONFIG_GROUP_DB)

        push_data_iter, push_data_count, all_runnables = test_scheduling.get_push_data(
            granularity)

        if granularity in ("label", "config_group"):
            test_scheduling.generate_failing_together_probabilities(
                granularity, push_data_iter(), push_data_count)

        def generate_all_data() -> Generator[Dict[str, Any], None, None]:
            past_failures = test_scheduling.get_past_failures(
                granularity, False)

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                commit_map[commit_data["node"]] = commit_data

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            if granularity in ("group", "config_group"):
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

            for (
                    i,
                (
                    revisions,
                    fix_revision,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ),
            ) in enumerate(tqdm(push_data_iter(), total=push_data_count)):
                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                # Skip wptsync commits, since they are not like normal pushes made by developers.
                if any(repository.is_wptsync(commit) for commit in commits):
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions +
                        likely_regressions))

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity in ("group", "config_group"):
                    update_touched_together_gen.send(commits[0]["node"])

                result_data = []
                for data in test_scheduling.generate_data(
                        granularity,
                        past_failures,
                        merged_commits,
                        push_num,
                        runnables_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result_data.append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield {
                        "revs": revisions,
                        "data": result_data,
                    }

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(
                f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()

        # For the config/group granularity, we are only interested in the failing together DB.
        if granularity != "config_group":
            db.append(test_scheduling_db, generate_all_data())

            zstd_compress(test_scheduling_db)
            create_tar_zst(past_failures_db)

        if granularity == "group":
            create_tar_zst(touched_together_db)

        if granularity in ("label", "config_group"):
            create_tar_zst(failing_together_db)
    def go(self,
           bugs: List[int],
           meta_bugs: Optional[List[int]] = None) -> None:
        if meta_bugs is not None:
            bugs += meta_bugs + self.get_blocking_of(meta_bugs)

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(bugs)

        component_team_mapping = bugzilla.get_component_team_mapping()

        bugs_set = set(bugs)

        commits = [
            commit for commit in repository.get_commits()
            if commit["bug_id"] in bugs_set
        ]
        commit_map = {commit["node"]: commit for commit in commits}
        hash_to_rev = {commit["node"]: i for i, commit in enumerate(commits)}

        logger.info(f"{len(commits)} commits to analyze.")

        logger.info(f"{len(bugs_set)} bugs to analyze.")

        bug_map = {}
        regressor_bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_map[bug["id"]] = bug

            if len(bug["regressions"]) > 0:
                regressor_bug_ids.add(bug["id"])

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        if meta_bugs is not None:
            blocker_to_meta = collections.defaultdict(set)
            for meta_bug in meta_bugs:
                if meta_bug not in bug_map:
                    continue

                for blocker_bug_id in bugzilla.find_blocking(
                        bug_map, bug_map[meta_bug]):
                    blocker_to_meta[blocker_bug_id].add(meta_bug)

        def _download_past_bugs(url: str) -> dict:
            path = os.path.join("data", os.path.basename(url)[:-4])
            download_check_etag(url, path=f"{path}.zst")
            zstd_decompress(path)
            assert os.path.exists(path)
            with open(path, "r") as f:
                return json.load(f)

        past_regressions_by = {}
        past_fixed_bugs_by = {}
        past_regression_blocked_bugs_by = {}
        past_fixed_bug_blocked_bugs_by = {}

        for dimension in ["component", "directory", "file", "function"]:
            past_regressions_by[dimension] = _download_past_bugs(
                PAST_REGRESSIONS_BY_URL.format(dimension=dimension))
            past_fixed_bugs_by[dimension] = _download_past_bugs(
                PAST_FIXED_BUGS_BY_URL.format(dimension=dimension))
            past_regression_blocked_bugs_by[dimension] = _download_past_bugs(
                PAST_REGRESSION_BLOCKED_BUGS_BY_URL.format(
                    dimension=dimension))
            past_fixed_bug_blocked_bugs_by[dimension] = _download_past_bugs(
                PAST_FIXED_BUG_BLOCKED_BUGS_BY_URL.format(dimension=dimension))

        path_to_component = repository.get_component_mapping()

        def get_full_component(bug):
            return "{}::{}".format(bug["product"], bug["component"])

        def histogram(components: List[str]) -> Dict[str, float]:
            counter = collections.Counter(components)
            return {
                component: count / len(components)
                for component, count in counter.most_common()
            }

        def component_histogram(bugs: List[dict]) -> Dict[str, float]:
            return histogram([bug["component"] for bug in bugs])

        def find_risk_band(risk: float) -> str:
            for name, start, end in self.risk_bands:
                if start <= risk <= end:
                    return name

            assert False

        def get_prev_bugs(past_bugs_by: dict,
                          commit: repository.CommitDict,
                          component: str = None) -> List[dict]:
            paths = [
                path for path in commit["files"]
                if component is None or (path.encode(
                    "utf-8") in path_to_component and path_to_component[
                        path.encode("utf-8")] == component.encode("utf-8"))
            ]

            past_bugs = []

            for path, f_group in commit["functions"].items():
                if path not in paths:
                    continue

                if path not in past_bugs_by["function"]:
                    continue

                found = False
                for f in f_group:
                    if f[0] not in past_bugs_by["function"][path]:
                        continue

                    found = True
                    past_bugs += past_bugs_by["function"][path][f[0]]

                if found:
                    paths.remove(path)

            for path in paths:
                if path in past_bugs_by["file"]:
                    past_bugs += past_bugs_by["file"][path]
                    paths.remove(path)

            for path, directories in zip(paths,
                                         repository.get_directories(paths)):
                found = False
                for directory in directories:
                    if directory in past_bugs_by["directory"]:
                        found = True
                        past_bugs += past_bugs_by["directory"][directory]

                if found:
                    paths.remove(path)

            components = [
                path_to_component[path.encode("utf-8")].tobytes().decode(
                    "utf-8") for path in paths
                if path.encode("utf-8") in path_to_component
            ]

            for component in components:
                if component in past_bugs_by["component"]:
                    past_bugs += past_bugs_by["component"][component]

            return past_bugs

        def get_prev_bugs_stats(
            commit_group: dict,
            commit_list: List[repository.CommitDict],
            component: str = None,
        ) -> None:
            # Find previous regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were fixed by touching the same files as these commits.
            # And find previous bugs that were blocked by regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were blocked by bugs that were fixed by touching the same files as those touched by these commits.
            prev_regressions: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_regressions_by, commit, component)
                 for commit in commit_list),
                [],
            )
            prev_fixed_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_fixed_bugs_by, commit, component)
                 for commit in commit_list),
                [],
            )
            prev_regression_blocked_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_regression_blocked_bugs_by, commit,
                               component) for commit in commit_list),
                [],
            )
            prev_fixed_bug_blocked_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_fixed_bug_blocked_bugs_by, commit,
                               component) for commit in commit_list),
                [],
            )

            prev_regressions = _deduplicate(prev_regressions)
            prev_fixed_bugs = _deduplicate(prev_fixed_bugs)
            prev_regression_blocked_bugs = _deduplicate(
                prev_regression_blocked_bugs)
            prev_fixed_bug_blocked_bugs = _deduplicate(
                prev_fixed_bug_blocked_bugs)

            regression_components = component_histogram(prev_regressions)
            fixed_bugs_components = component_histogram(prev_fixed_bugs)
            regression_blocked_bug_components = component_histogram(
                prev_regression_blocked_bugs)
            fixed_bug_blocked_bug_components = component_histogram(
                prev_fixed_bug_blocked_bugs)

            commit_group[
                "most_common_regression_components"] = regression_components
            # These are only used for component connections for the time being.
            if component:
                commit_group["prev_regressions"] = prev_regressions[-3:]
                commit_group["prev_fixed_bugs"] = prev_fixed_bugs[-3:]
                commit_group[
                    "prev_regression_blocked_bugs"] = prev_regression_blocked_bugs[
                        -3:]
                commit_group[
                    "prev_fixed_bug_blocked_bugs"] = prev_fixed_bug_blocked_bugs[
                        -3:]
                commit_group[
                    "most_common_fixed_bugs_components"] = fixed_bugs_components
                commit_group[
                    "most_common_regression_blocked_bug_components"] = regression_blocked_bug_components
                commit_group[
                    "most_common_fixed_bug_blocked_bug_components"] = fixed_bug_blocked_bug_components

        def get_commit_data(
                commit_list: List[repository.CommitDict]) -> List[dict]:
            if len(commit_list) == 0:
                return []

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list,
                                                  probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(
                        revision_map[revision_id])

                    if testing is None:
                        testing = "missing"
                else:
                    testing = None

                commits_data.append({
                    "id":
                    commit["node"],
                    "testing":
                    testing,
                    "risk":
                    float(probs[i][1]),
                    "backedout":
                    bool(commit["backedoutby"]),
                    "author":
                    commit["author_email"],
                    "reviewers":
                    commit["reviewers"],
                    "coverage": [
                        commit["cov_added"],
                        commit["cov_covered"],
                        commit["cov_unknown"],
                    ],
                })

            return commits_data

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        bug_to_commits = {}
        for bug_id, commit_iter in itertools.groupby(commits,
                                                     lambda x: x["bug_id"]):
            # TODO: Figure out what to do with bugs we couldn't download (security bugs).
            if bug_id not in bug_map:
                continue

            bug_to_commits[bug_id] = sorted(
                commit_iter, key=lambda x: hash_to_rev[x["node"]])

        bug_summaries = []
        for bug_id in bugs:
            if bug_id not in bug_map:
                continue

            commit_list = bug_to_commits.get(bug_id, [])
            commit_data = get_commit_data(commit_list)

            bug = bug_map[bug_id]

            bug_summary = {
                "id":
                bug_id,
                "regressor":
                bug_id in regressor_bug_ids,
                "regression":
                len(bug["regressed_by"]) > 0
                or any(keyword in bug["keywords"]
                       for keyword in ["regression", "talos-regression"])
                or ("cf_has_regression_range" in bug
                    and bug["cf_has_regression_range"] == "yes"),
                "whiteboard":
                bug["whiteboard"],
                "assignee":
                bug["assigned_to"]
                if bug["assigned_to"] != "*****@*****.**" else None,
                "versions":
                bugzilla.get_fixed_versions(bug),
                "component":
                get_full_component(bug),
                "team":
                bugzilla.component_to_team(component_team_mapping,
                                           bug["product"], bug["component"]),
                "summary":
                bug["summary"],
                "types":
                bug_to_types(bug),
                "severity":
                bug["severity"],
                "creation_date":
                dateutil.parser.parse(
                    bug["creation_time"]).strftime("%Y-%m-%d"),
                "date":
                max(
                    dateutil.parser.parse(commit["pushdate"])
                    for commit in commit_list).strftime("%Y-%m-%d")
                if len(commit_list) > 0 else None,
                "commits":
                commit_data,
                "meta_ids":
                list(blocker_to_meta[bug_id]),
                "risk_band":
                find_risk_band(max(commit["risk"] for commit in commit_data))
                if len(commit_data) > 0 else None,
            }

            get_prev_bugs_stats(bug_summary, commit_list)

            bug_summaries.append(bug_summary)

        landings_by_date = collections.defaultdict(list)
        for bug_summary in bug_summaries:
            landings_by_date[bug_summary["creation_date"]].append(bug_summary)

        with open("landings_by_date.json", "w") as f:
            output: dict = {
                "summaries": landings_by_date,
            }
            if meta_bugs is not None:
                output["featureMetaBugs"] = [{
                    "id":
                    meta_bug,
                    "summary":
                    bug_map[meta_bug]["summary"]
                } for meta_bug in meta_bugs]

            json.dump(output, f)

        # Retrieve components of test failures that occurred when landing patches to fix bugs in specific components.
        component_failures = collections.defaultdict(list)

        push_data_iter, push_data_count, all_runnables = test_scheduling.get_push_data(
            "group")

        for revisions, _, _, possible_regressions, likely_regressions in tqdm(
                push_data_iter(), total=push_data_count):
            commit_list = [
                commit_map[revision] for revision in revisions
                if revision in commit_map
            ]
            if len(commit_list) == 0:
                continue

            commit_bugs = [
                bug_map[commit["bug_id"]] for commit in commit_list
                if commit["bug_id"] in bug_map
            ]

            components = list(
                set(get_full_component(bug) for bug in commit_bugs))

            groups = [
                group for group in list(
                    set(possible_regressions + likely_regressions))
                if group.encode("utf-8") in path_to_component
            ]

            for group in groups:
                for component in components:
                    component_failures[component].append(path_to_component[
                        group.encode("utf-8")].tobytes().decode("utf-8"))

        # Filter out commits for which we have no bugs.
        commits = [commit for commit in commits if commit["bug_id"] in bug_map]

        # Sort commits by bug component, so we can use itertools.groupby to group them by bug component.
        commits.sort(key=lambda x: get_full_component(bug_map[x["bug_id"]]))

        commit_groups = []
        for component, commit_iter in itertools.groupby(
                commits, lambda x: get_full_component(bug_map[x["bug_id"]])):
            commit_group = {
                "component": component,
                "most_common_test_failure_components":
                histogram(component_failures[component])
                if component in component_failures else {},
            }
            get_prev_bugs_stats(commit_group, list(commit_iter), component)
            commit_groups.append(commit_group)

        with open("component_connections.json", "w") as f:
            json.dump(commit_groups, f)

        repository.close_component_mapping()
Esempio n. 5
0
    def evaluation(self) -> None:
        # Get a test set of pushes on which to test the model.
        pushes, train_push_len = self.get_pushes(False)

        # To evaluate the model with reductions enabled, we need to regenerate the failing together DB, using
        # only failure data from the training pushes (otherwise, we'd leak training information into the test
        # set).
        print("Generate failing together DB (restricted to training pushes)")
        push_data_iter, push_data_count, _ = test_scheduling.get_push_data(
            "label" if self.granularity == "label" else "config_group"
        )
        test_scheduling.generate_failing_together_probabilities(
            "label" if self.granularity == "label" else "config_group",
            push_data_iter(),
            push_data_count,
            pushes[train_push_len - 1]["revs"][0],
        )

        test_pushes_list = pushes[train_push_len:]

        all_tasks = reduce(
            lambda x, y: x | y,
            (
                set(push["failures"]) | set(push["passes"])
                for push in test_pushes_list[-28:]
            ),
        )

        all_revs = set(sum((push["revs"] for push in test_pushes_list), []))

        test_pushes_failures = sum(
            1 for push in test_pushes_list if len(push["failures"]) > 0
        )

        test_pushes = {push["revs"][0]: push for push in test_pushes_list}

        if self.granularity == "group":
            for (
                revisions,
                fix_revision,
                push_runnables,
                possible_regressions,
                likely_regressions,
            ) in tqdm(push_data_iter(), total=push_data_count):
                if revisions[0] not in test_pushes:
                    continue

                test_pushes[revisions[0]]["config_group_failures"] = (
                    possible_regressions + likely_regressions
                )

        print(
            f"Testing on {len(test_pushes)} ({test_pushes_failures} with failures) out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
        )

        del pushes

        commit_map = get_commit_map(all_revs)

        past_failures_data = test_scheduling.get_past_failures(self.granularity, True)
        last_push_num = past_failures_data["push_num"]
        past_failures_data.close()

        # Select tests for all the pushes in the test set.
        for i, push in enumerate(tqdm(test_pushes.values())):
            commits = tuple(
                commit_map.pop(revision)
                for revision in push["revs"]
                if revision in commit_map
            )
            if len(commits) == 0:
                push["all_possibly_selected"] = {}
                continue

            push_num = last_push_num - (len(test_pushes) - (i + 1))

            # Note: we subtract 100 to the push number to make sure we don't use
            # past failure data for the push itself.
            # The number 100 comes from the fact that in the past failure data
            # generation we store past failures in batches of 100 pushes.
            push["all_possibly_selected"] = self.select_tests(
                commits, 0.5, push_num - 100
            )

        def do_eval(
            executor: concurrent.futures.ProcessPoolExecutor,
            confidence_threshold: float,
            reduction: Optional[float],
            cap: Optional[int],
            minimum: Optional[int],
        ) -> None:
            futures: Dict[concurrent.futures.Future, Dict[str, Any]] = {}
            for push in test_pushes.values():
                futures[
                    executor.submit(
                        eval_apply_transforms,
                        self,
                        push,
                        confidence_threshold,
                        reduction,
                        cap,
                        minimum,
                    )
                ] = push

            for future in concurrent.futures.as_completed(futures):
                exc = future.exception()
                if exc is not None:
                    print(
                        "Exception {} while running {}".format(
                            exc, futures[future]["revs"][0]
                        )
                    )
                    for f in futures:
                        f.cancel()

                push = futures[future]
                selected, group_configs = future.result()

                if reduction is not None and self.granularity == "group":
                    push["number_configs"] = len(
                        set(
                            sum(
                                group_configs.values(),
                                [],
                            )
                        )
                    )
                    selected_config_groups = set(
                        (config, group)
                        for group, configs in group_configs.items()
                        for config in configs
                    )
                    caught_config_groups = selected_config_groups & set(
                        push["config_group_failures"]
                    )
                    push["caught_one_config_group"] = (
                        len(caught_config_groups) > 0
                        if len(push["config_group_failures"]) != 0
                        else None
                    )
                    push["caught_percentage_config_group"] = (
                        len(caught_config_groups) / len(push["config_group_failures"])
                        if len(push["config_group_failures"]) != 0
                        else None
                    )

                caught = selected & set(push["failures"])

                push["number_scheduled"] = len(selected)
                push["caught_one"] = (
                    len(caught) > 0 if len(push["failures"]) != 0 else None
                )
                push["some_didnt_run"] = (
                    not selected.issubset(set(push["passes"]) | set(push["failures"])),
                )
                push["caught_percentage"] = (
                    len(caught) / len(push["failures"])
                    if len(push["failures"]) != 0
                    else None
                )

            min_scheduled = min(
                result["number_scheduled"] for result in test_pushes.values()
            )
            max_scheduled = max(
                result["number_scheduled"] for result in test_pushes.values()
            )
            average_scheduled = statistics.mean(
                result["number_scheduled"] for result in test_pushes.values()
            )
            num_failing_pushes = sum(
                1 for result in test_pushes.values() if result["caught_one"] is not None
            )
            num_caught_one = sum(
                1 for result in test_pushes.values() if result["caught_one"]
            )
            num_caught_one_or_some_didnt_run = sum(
                1
                for result in test_pushes.values()
                if result["caught_one"]
                or (result["caught_one"] is not None and result["some_didnt_run"])
            )
            percentage_caught_one = 100 * num_caught_one / num_failing_pushes
            percentage_caught_one_or_some_didnt_run = (
                100 * num_caught_one_or_some_didnt_run / num_failing_pushes
            )
            average_caught_percentage = 100 * statistics.mean(
                result["caught_percentage"]
                for result in test_pushes.values()
                if result["caught_percentage"] is not None
            )

            reduction_str = (
                f"enabled at {reduction * 100}%"
                if reduction is not None
                else "disabled"
            )

            message = f"For confidence threshold {confidence_threshold}, with reduction {reduction_str}, cap at {cap}, and minimum at {minimum}: scheduled {average_scheduled} tasks on average (min {min_scheduled}, max {max_scheduled}). In {percentage_caught_one}% of pushes we caught at least one failure ({percentage_caught_one_or_some_didnt_run}% ignoring misses when some of our selected tasks didn't run). On average, we caught {average_caught_percentage}% of all seen failures."

            if reduction is not None and self.granularity == "group":
                average_configs = statistics.mean(
                    result["number_configs"] for result in test_pushes.values()
                )
                median_configs = statistics.median(
                    result["number_configs"] for result in test_pushes.values()
                )
                message += f" On average, we selected {average_configs} configs (a median of {median_configs} configs)."

                num_caught_one_config_group = sum(
                    1
                    for result in test_pushes.values()
                    if result["caught_one_config_group"]
                )
                percentage_caught_one_config_group = (
                    100 * num_caught_one_config_group / num_failing_pushes
                )
                average_caught_percentage_config_group = 100 * statistics.mean(
                    result["caught_percentage_config_group"]
                    for result in test_pushes.values()
                    if result["caught_percentage_config_group"] is not None
                )

                message += f" In {percentage_caught_one_config_group}% of pushes we caught at least one config/group failure. On average, we caught {average_caught_percentage_config_group}% of all seen config/group failures."

            print(message)

        with concurrent.futures.ProcessPoolExecutor(
            max_workers=utils.get_physical_cpu_count()
        ) as executor:
            scenarios = [
                (None, None, None),
                (10, None, None),
                (None, 300, None),
                (None, None, 0.9),
                (None, None, 1.0),
            ]
            for minimum, cap, reduction in scenarios:
                # Pre-generate equivalence sets, so when we run the config selection in multiple processes
                # we don't risk concurrent writes to the equivalence sets file.
                if reduction is not None and self.granularity == "group":
                    self._get_equivalence_sets(reduction)

                for confidence_threshold in [0.5, 0.7, 0.8, 0.85, 0.9, 0.95]:
                    do_eval(executor, confidence_threshold, reduction, cap, minimum)