def get_commit_data(commit_list: List[repository.CommitDict]) -> List[dict]:
            if len(commit_list) == 0:
                return []

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list, probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(revision_map[revision_id])

                    if testing is None:
                        testing = "missing"
                else:
                    testing = None

                commits_data.append(
                    {
                        "id": commit["node"],
                        "testing": testing,
                        "risk": float(probs[i][1]),
                        "backedout": bool(commit["backedoutby"]),
                        "author": commit["author_email"],
                        "reviewers": commit["reviewers"],
                        "coverage": [
                            commit["cov_added"],
                            commit["cov_covered"],
                            commit["cov_unknown"],
                        ],
                    }
                )

            return commits_data
Example #2
0
 def list_testing_projects(
     commits: Iterable[repository.CommitDict], ) -> Collection[str]:
     return list(
         filter(
             None,
             (phabricator.get_testing_project(
                 revision_map[repository.get_revision_id(commit)])
              for commit in commits),
         ))
Example #3
0
    def go(self,
           bugs: List[int],
           meta_bugs: Optional[List[int]] = None) -> None:
        if meta_bugs is not None:
            bugs += meta_bugs + self.get_blocking_of(meta_bugs)

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(bugs)

        bugs_set = set(bugs)

        commits = [
            commit for commit in repository.get_commits()
            if commit["bug_id"] in bugs_set
        ]
        hash_to_rev = {commit["node"]: i for i, commit in enumerate(commits)}

        logger.info(f"{len(commits)} commits to analyze.")

        bug_ids = {commit["bug_id"] for commit in commits}

        logger.info(f"{len(bug_ids)} bugs to analyze.")

        bug_map = {}
        regressor_bug_ids = set()
        for bug in bugzilla.get_bugs():
            if bug["id"] in bugs_set:
                bug_map[bug["id"]] = bug

            if len(bug["regressions"]) > 0:
                regressor_bug_ids.add(bug["id"])

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        if meta_bugs is not None:
            blocker_to_meta = collections.defaultdict(set)
            for meta_bug in meta_bugs:
                if meta_bug not in bug_map:
                    continue

                for blocker_bug_id in bugzilla.find_blocking(
                        bug_map, bug_map[meta_bug]):
                    blocker_to_meta[blocker_bug_id].add(meta_bug)

        # TODO: Use past regressions by function information too (maybe first by function and if no results by file? or prioritize function and recentness?)

        def _download_past_bugs(url: str) -> dict:
            path = os.path.join("data", os.path.basename(url)[:-4])
            download_check_etag(url, path=f"{path}.zst")
            zstd_decompress(path)
            assert os.path.exists(path)
            with open(path, "r") as f:
                return json.load(f)

        past_regressions_by_file = _download_past_bugs(
            PAST_REGRESSIONS_BY_FILE_URL)
        past_fixed_bugs_by_file = _download_past_bugs(
            PAST_FIXED_BUGS_BY_FILE_URL)
        past_regression_blocked_bugs_by_file = _download_past_bugs(
            PAST_REGRESSION_BLOCKED_BUGS_BY_FILE_URL)
        past_fixed_bug_blocked_bugs_by_file = _download_past_bugs(
            PAST_FIXED_BUG_BLOCKED_BUGS_BY_FILE_URL)

        def component_histogram(bugs: List[dict]) -> Dict[str, float]:
            counter = collections.Counter(bug["component"] for bug in bugs)
            return {
                component: count / len(bugs)
                for component, count in counter.most_common()
            }

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        commit_groups = []
        for bug_id, commit_iter in itertools.groupby(commits,
                                                     lambda x: x["bug_id"]):
            # TODO: Figure out what to do with bugs we couldn't download (security bugs).
            if bug_id not in bug_map:
                continue

            commit_list = list(commit_iter)
            commit_list.sort(key=lambda x: hash_to_rev[x["node"]])

            # Find previous regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were fixed by touching the same files as these commits.
            # And find previous bugs that were blocked by regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were blocked by bugs that were fixed by touching the same files as those touched by these commits.
            prev_regressions: List[Dict[str, Any]] = []
            prev_fixed_bugs: List[Dict[str, Any]] = []
            prev_regression_blocked_bugs: List[Dict[str, Any]] = []
            prev_fixed_bug_blocked_bugs: List[Dict[str, Any]] = []
            for commit in commit_list:
                for path in commit["files"]:
                    if path in past_regressions_by_file:
                        prev_regressions.extend(
                            bug_summary
                            for bug_summary in past_regressions_by_file[path])

                    if path in past_fixed_bugs_by_file:
                        prev_fixed_bugs.extend(
                            bug_summary
                            for bug_summary in past_fixed_bugs_by_file[path])

                    if path in past_regression_blocked_bugs_by_file:
                        prev_regression_blocked_bugs.extend(
                            bug_summary for bug_summary in
                            past_regression_blocked_bugs_by_file[path])

                    if path in past_fixed_bug_blocked_bugs_by_file:
                        prev_fixed_bug_blocked_bugs.extend(
                            bug_summary for bug_summary in
                            past_fixed_bug_blocked_bugs_by_file[path])

            prev_regressions = _deduplicate(prev_regressions)
            prev_fixed_bugs = _deduplicate(prev_fixed_bugs)
            prev_regression_blocked_bugs = _deduplicate(
                prev_regression_blocked_bugs)
            prev_fixed_bug_blocked_bugs = _deduplicate(
                prev_fixed_bug_blocked_bugs)

            regression_components = component_histogram(prev_regressions)
            fixed_bugs_components = component_histogram(prev_fixed_bugs)
            regression_blocked_bug_components = component_histogram(
                prev_regression_blocked_bugs)
            fixed_bug_blocked_bug_components = component_histogram(
                prev_fixed_bug_blocked_bugs)

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list,
                                                  probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(
                        revision_map[revision_id])

                    if testing is None:
                        testing = "none"
                else:
                    testing = None

                commits_data.append({
                    "id":
                    commit["node"],
                    "testing":
                    testing,
                    "risk":
                    float(probs[i][1]),
                    "backedout":
                    bool(commit["backedoutby"]),
                    "regressor":
                    commit["bug_id"] in regressor_bug_ids,
                })

            bug = bug_map[bug_id]

            commit_groups.append({
                "id":
                bug_id,
                "versions":
                bugzilla.get_fixed_versions(bug),
                "component":
                "{}::{}".format(bug["product"], bug["component"]),
                "summary":
                bug["summary"],
                "date":
                max(
                    dateutil.parser.parse(commit["pushdate"])
                    for commit in commit_list).strftime("%Y-%m-%d"),
                "commits":
                commits_data,
                "meta_ids":
                list(blocker_to_meta[bug_id]),
                "prev_regressions":
                prev_regressions[-3:],
                "prev_fixed_bugs":
                prev_fixed_bugs[-3:],
                "prev_regression_blocked_bugs":
                prev_regression_blocked_bugs[-3:],
                "prev_fixed_bug_blocked_bugs":
                prev_fixed_bug_blocked_bugs[-3:],
                "most_common_regression_components":
                regression_components,
                "most_common_fixed_bugs_components":
                fixed_bugs_components,
                "most_common_regression_blocked_bug_components":
                regression_blocked_bug_components,
                "most_common_fixed_bug_blocked_bug_components":
                fixed_bug_blocked_bug_components,
            })

        landings_by_date = collections.defaultdict(list)
        for commit_group in commit_groups:
            landings_by_date[commit_group["date"]].append(commit_group)

        with open("landings_by_date.json", "w") as f:
            output: dict = {
                "landings": landings_by_date,
            }
            if meta_bugs is not None:
                output["featureMetaBugs"] = [{
                    "id":
                    meta_bug,
                    "summary":
                    bug_map[meta_bug]["summary"]
                } for meta_bug in meta_bugs]

            json.dump(output, f)
Example #4
0
    def go(self, days_start: int, days_end: int) -> None:
        commits = self.get_landed_since(days_start, days_end)

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(commit["bug_id"] for commit in commits
                               if commit["bug_id"])

        # Filter-out commits with no Phabricator revision linked to them, or with no testing tags.
        commits = [
            commit for commit in commits
            if repository.get_revision_id(commit) in revision_map
        ]
        logger.info(f"{len(commits)} revisions")

        # Filter-out commits with no testing tags.
        commits = [
            commit for commit in commits if phabricator.get_testing_project(
                revision_map[repository.get_revision_id(commit)]) is not None
        ]
        logger.info(f"{len(commits)} revisions with testing tags")

        def list_testing_projects(
            commits: Iterable[repository.CommitDict], ) -> Collection[str]:
            return list(
                filter(
                    None,
                    (phabricator.get_testing_project(
                        revision_map[repository.get_revision_id(commit)])
                     for commit in commits),
                ))

        testing_projects = list_testing_projects(commits)

        print(f"Most common testing tags (in {len(commits)} revisions):")
        for testing_project, count in collections.Counter(
                testing_projects).most_common():
            print(
                f"{testing_project} - {round(100 * count / len(testing_projects), 1)}%"
            )

        backedout_commits = [
            commit for commit in commits if commit["backedoutby"]
        ]
        backedout_testing_projects = list_testing_projects(backedout_commits)

        print(
            f"\nMost common testing tags for backed-out revisions (in {len(backedout_commits)} revisions):"
        )
        for testing_project, count in collections.Counter(
                backedout_testing_projects).most_common():
            print(
                f"{testing_project} - {round(100 * count / len(backedout_testing_projects), 1)}%"
            )

        regressor_bug_ids = {
            bug["id"]
            for bug in bugzilla.get_bugs() if len(bug["regressions"]) > 0
        }

        regressor_commits = [
            commit for commit in commits
            if commit["bug_id"] in regressor_bug_ids
        ]
        regressor_testing_projects = list_testing_projects(regressor_commits)

        print(
            f"\nMost common testing tags for revisions which caused regressions (in {len(regressor_commits)} revisions):"
        )
        for testing_project, count in collections.Counter(
                regressor_testing_projects).most_common():
            print(
                f"{testing_project} - {round(100 * count / len(regressor_testing_projects), 1)}%"
            )