コード例 #1
0
        def get_commit_data(commit_list: List[repository.CommitDict]) -> List[dict]:
            if len(commit_list) == 0:
                return []

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list, probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(revision_map[revision_id])

                    if testing is None:
                        testing = "missing"
                else:
                    testing = None

                commits_data.append(
                    {
                        "id": commit["node"],
                        "testing": testing,
                        "risk": float(probs[i][1]),
                        "backedout": bool(commit["backedoutby"]),
                        "author": commit["author_email"],
                        "reviewers": commit["reviewers"],
                        "coverage": [
                            commit["cov_added"],
                            commit["cov_covered"],
                            commit["cov_unknown"],
                        ],
                    }
                )

            return commits_data
コード例 #2
0
    def retrieve_revisions(self, limit: Optional[int] = None) -> None:
        phabricator.set_api_key(get_secret("PHABRICATOR_URL"),
                                get_secret("PHABRICATOR_TOKEN"))

        db.download(phabricator.REVISIONS_DB)

        # Get the commits DB, as we need it to get the revision IDs linked to recent commits.
        assert db.download(repository.COMMITS_DB)

        # Get the bugs DB, as we need it to get the revision IDs linked to bugs.
        assert db.download(bugzilla.BUGS_DB)

        # Get IDs of revisions linked to commits since a year ago.
        start_date = datetime.utcnow() - relativedelta(years=1)
        revision_ids = list((filter(
            None,
            (repository.get_revision_id(commit)
             for commit in repository.get_commits()
             if dateutil.parser.parse(commit["pushdate"]) >= start_date),
        )))
        if limit is not None:
            revision_ids = revision_ids[-limit:]

        # Get IDs of revisions linked to bugs since a year ago.
        for bug in bugzilla.get_bugs():
            if (dateutil.parser.parse(
                    bug["creation_time"]).replace(tzinfo=None) < start_date):
                continue

            revision_ids += bugzilla.get_revision_ids(bug)

        phabricator.download_revisions(revision_ids)

        zstd_compress(phabricator.REVISIONS_DB)
コード例 #3
0
 def list_testing_projects(
     commits: Iterable[repository.CommitDict], ) -> Collection[str]:
     return list(
         filter(
             None,
             (phabricator.get_testing_project(
                 revision_map[repository.get_revision_id(commit)])
              for commit in commits),
         ))
コード例 #4
0
    def generate_landings_by_date(
        self,
        bug_map: Dict[int, bugzilla.BugDict],
        regressor_bug_ids: Set[int],
        bugs: List[int],
        meta_bugs: Dict[int, List[int]],
    ) -> None:
        # A map from bug ID to the list of commits associated to the bug (in order of landing).
        bug_to_commits = collections.defaultdict(list)

        for commit in repository.get_commits():
            bug_id = commit["bug_id"]
            if not bug_id:
                continue

            if bug_id in bug_map or bug_id in regressor_bug_ids:
                bug_to_commits[bug_id].append(commit)

        # All bugs blocking the "fuzz" bug (316898) and its dependent meta bugs are fuzzing bugs.
        fuzzblocker_bugs = set(bug["id"] for bug in bug_map.values()
                               if is_fuzzblocker(bug))
        fuzzing_bugs = (set(
            sum(self.get_blocking_of([316898], meta_only=True).values(), []) +
            [
                bug["id"] for bug in bug_map.values() if "bugmon" in
                bug["whiteboard"].lower() or "bugmon" in bug["keywords"]
            ])
                        | fuzzblocker_bugs)

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(
                None,
                (repository.get_revision_id(commit) for bug_id in bugs
                 for commit in bug_to_commits[bug_id]),
            ))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        blocker_to_meta = collections.defaultdict(set)
        for meta_bug, blocker_bug_ids in meta_bugs.items():
            for blocker_bug_id in blocker_bug_ids:
                blocker_to_meta[blocker_bug_id].add(meta_bug)

        def find_risk_band(risk: float) -> str:
            for name, start, end in self.risk_bands:
                if start <= risk <= end:
                    return name

            assert False

        def get_commit_data(
                commit_list: List[repository.CommitDict]) -> List[dict]:
            if len(commit_list) == 0:
                return []

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list,
                                                  probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    revision = revision_map[revision_id]

                    testing = phabricator.get_testing_project(revision)
                    if testing is None:
                        testing = "missing"

                    first_review_time = phabricator.get_review_time(revision)
                else:
                    testing = None
                    first_review_time = None

                commits_data.append({
                    "id":
                    commit["node"],
                    "testing":
                    testing,
                    "first_review_time":
                    first_review_time.total_seconds() /
                    86400 if first_review_time else None,
                    "risk":
                    float(probs[i][1]),
                    "backedout":
                    bool(commit["backedoutby"]),
                    "author":
                    commit["author_email"],
                    "reviewers":
                    commit["reviewers"],
                    "coverage": [
                        commit["cov_added"],
                        commit["cov_covered"],
                        commit["cov_unknown"],
                    ],
                })

            return commits_data

        component_team_mapping = bugzilla.get_component_team_mapping()

        bug_summaries = []
        for bug_id in bugs:
            if bug_id not in bug_map:
                continue

            commit_list = bug_to_commits.get(bug_id, [])
            commit_data = get_commit_data(commit_list)

            bug = bug_map[bug_id]

            time_to_bug = None
            for regressor_bug_id in bug["regressed_by"]:
                # Get the date of the last commit in the regressor bug that landed before the regression bug.
                last_commit_date = max(
                    (dateutil.parser.parse(commit["pushdate"])
                     for commit in bug_to_commits.get(regressor_bug_id, [])
                     if dateutil.parser.parse(commit["pushdate"]) < dateutil.
                     parser.parse(bug["creation_time"]).replace(tzinfo=None)),
                    default=None,
                )

                if last_commit_date is None:
                    continue

                # Get the minimum "time to bug" (from the fix time of the closest regressor to the regression bug).
                cur_time_to_bug = (dateutil.parser.parse(
                    bug["creation_time"]).replace(tzinfo=None) -
                                   last_commit_date).total_seconds() / 86400
                if time_to_bug is None or cur_time_to_bug < time_to_bug:
                    time_to_bug = cur_time_to_bug

            time_to_confirm = None
            if bug["is_confirmed"]:
                for history in bug["history"]:
                    for change in history["changes"]:
                        if (change["field_name"] == "status"
                                and change["removed"] == "UNCONFIRMED"
                                and change["added"] in ("NEW", "ASSIGNED")):
                            time_to_confirm = (
                                dateutil.parser.parse(
                                    history["when"]).replace(tzinfo=None) -
                                dateutil.parser.parse(bug["creation_time"]).
                                replace(tzinfo=None)).total_seconds() / 86400
                            break

                    if time_to_confirm is not None:
                        break

            bug_summary = {
                "id":
                bug_id,
                "regressor":
                bug_id in regressor_bug_ids,
                "regression":
                len(bug["regressed_by"]) > 0
                or any(keyword in bug["keywords"]
                       for keyword in ["regression", "talos-regression"])
                or ("cf_has_regression_range" in bug
                    and bug["cf_has_regression_range"] == "yes"),
                "time_to_bug":
                time_to_bug,
                "time_to_confirm":
                time_to_confirm,
                "whiteboard":
                bug["whiteboard"],
                "assignee":
                bug["assigned_to"]
                if bug["assigned_to"] != "*****@*****.**" else None,
                "versions":
                bugzilla.get_fixed_versions(bug),
                "component":
                get_full_component(bug),
                "team":
                component_team_mapping.get(bug["product"],
                                           {}).get(bug["component"]),
                "summary":
                bug["summary"],
                "fixed":
                bug["resolution"] == "FIXED",
                "types":
                bug_to_types(bug, bug_map),
                "severity":
                bug["severity"],
                "creation_date":
                dateutil.parser.parse(
                    bug["creation_time"]).strftime("%Y-%m-%d"),
                "date":
                max(
                    dateutil.parser.parse(commit["pushdate"])
                    for commit in commit_list).strftime("%Y-%m-%d")
                if len(commit_list) > 0 else None,
                "commits":
                commit_data,
                "meta_ids":
                list(blocker_to_meta[bug_id]),
                "risk_band":
                find_risk_band(max(commit["risk"] for commit in commit_data))
                if len(commit_data) > 0 else None,
                "fuzz":
                "b" if bug["id"] in fuzzblocker_bugs else
                "y" if bug["id"] in fuzzing_bugs else "n",
            }

            self.get_prev_bugs_stats(bug_summary, commit_list)

            bug_summaries.append(bug_summary)

        landings_by_date = collections.defaultdict(list)
        for bug_summary in bug_summaries:
            landings_by_date[bug_summary["creation_date"]].append(bug_summary)

        with open("landings_by_date.json", "w") as f:
            output: dict = {
                "summaries": landings_by_date,
            }
            if meta_bugs is not None:
                output["featureMetaBugs"] = [{
                    "id":
                    meta_bug,
                    "summary":
                    bug_map[meta_bug]["summary"]
                } for meta_bug in meta_bugs]

            json.dump(output, f)
コード例 #5
0
    def go(self,
           bugs: List[int],
           meta_bugs: Optional[List[int]] = None) -> None:
        if meta_bugs is not None:
            bugs += meta_bugs + self.get_blocking_of(meta_bugs)

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(bugs)

        component_team_mapping = bugzilla.get_component_team_mapping()

        bugs_set = set(bugs)

        commits = [
            commit for commit in repository.get_commits()
            if commit["bug_id"] in bugs_set
        ]
        commit_map = {commit["node"]: commit for commit in commits}
        hash_to_rev = {commit["node"]: i for i, commit in enumerate(commits)}

        logger.info(f"{len(commits)} commits to analyze.")

        logger.info(f"{len(bugs_set)} bugs to analyze.")

        bug_map = {}
        regressor_bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_map[bug["id"]] = bug

            if len(bug["regressions"]) > 0:
                regressor_bug_ids.add(bug["id"])

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        if meta_bugs is not None:
            blocker_to_meta = collections.defaultdict(set)
            for meta_bug in meta_bugs:
                if meta_bug not in bug_map:
                    continue

                for blocker_bug_id in bugzilla.find_blocking(
                        bug_map, bug_map[meta_bug]):
                    blocker_to_meta[blocker_bug_id].add(meta_bug)

        def _download_past_bugs(url: str) -> dict:
            path = os.path.join("data", os.path.basename(url)[:-4])
            download_check_etag(url, path=f"{path}.zst")
            zstd_decompress(path)
            assert os.path.exists(path)
            with open(path, "r") as f:
                return json.load(f)

        past_regressions_by = {}
        past_fixed_bugs_by = {}
        past_regression_blocked_bugs_by = {}
        past_fixed_bug_blocked_bugs_by = {}

        for dimension in ["component", "directory", "file", "function"]:
            past_regressions_by[dimension] = _download_past_bugs(
                PAST_REGRESSIONS_BY_URL.format(dimension=dimension))
            past_fixed_bugs_by[dimension] = _download_past_bugs(
                PAST_FIXED_BUGS_BY_URL.format(dimension=dimension))
            past_regression_blocked_bugs_by[dimension] = _download_past_bugs(
                PAST_REGRESSION_BLOCKED_BUGS_BY_URL.format(
                    dimension=dimension))
            past_fixed_bug_blocked_bugs_by[dimension] = _download_past_bugs(
                PAST_FIXED_BUG_BLOCKED_BUGS_BY_URL.format(dimension=dimension))

        path_to_component = repository.get_component_mapping()

        def get_full_component(bug):
            return "{}::{}".format(bug["product"], bug["component"])

        def histogram(components: List[str]) -> Dict[str, float]:
            counter = collections.Counter(components)
            return {
                component: count / len(components)
                for component, count in counter.most_common()
            }

        def component_histogram(bugs: List[dict]) -> Dict[str, float]:
            return histogram([bug["component"] for bug in bugs])

        def find_risk_band(risk: float) -> str:
            for name, start, end in self.risk_bands:
                if start <= risk <= end:
                    return name

            assert False

        def get_prev_bugs(past_bugs_by: dict,
                          commit: repository.CommitDict,
                          component: str = None) -> List[dict]:
            paths = [
                path for path in commit["files"]
                if component is None or (path.encode(
                    "utf-8") in path_to_component and path_to_component[
                        path.encode("utf-8")] == component.encode("utf-8"))
            ]

            past_bugs = []

            for path, f_group in commit["functions"].items():
                if path not in paths:
                    continue

                if path not in past_bugs_by["function"]:
                    continue

                found = False
                for f in f_group:
                    if f[0] not in past_bugs_by["function"][path]:
                        continue

                    found = True
                    past_bugs += past_bugs_by["function"][path][f[0]]

                if found:
                    paths.remove(path)

            for path in paths:
                if path in past_bugs_by["file"]:
                    past_bugs += past_bugs_by["file"][path]
                    paths.remove(path)

            for path, directories in zip(paths,
                                         repository.get_directories(paths)):
                found = False
                for directory in directories:
                    if directory in past_bugs_by["directory"]:
                        found = True
                        past_bugs += past_bugs_by["directory"][directory]

                if found:
                    paths.remove(path)

            components = [
                path_to_component[path.encode("utf-8")].tobytes().decode(
                    "utf-8") for path in paths
                if path.encode("utf-8") in path_to_component
            ]

            for component in components:
                if component in past_bugs_by["component"]:
                    past_bugs += past_bugs_by["component"][component]

            return past_bugs

        def get_prev_bugs_stats(
            commit_group: dict,
            commit_list: List[repository.CommitDict],
            component: str = None,
        ) -> None:
            # Find previous regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were fixed by touching the same files as these commits.
            # And find previous bugs that were blocked by regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were blocked by bugs that were fixed by touching the same files as those touched by these commits.
            prev_regressions: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_regressions_by, commit, component)
                 for commit in commit_list),
                [],
            )
            prev_fixed_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_fixed_bugs_by, commit, component)
                 for commit in commit_list),
                [],
            )
            prev_regression_blocked_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_regression_blocked_bugs_by, commit,
                               component) for commit in commit_list),
                [],
            )
            prev_fixed_bug_blocked_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_fixed_bug_blocked_bugs_by, commit,
                               component) for commit in commit_list),
                [],
            )

            prev_regressions = _deduplicate(prev_regressions)
            prev_fixed_bugs = _deduplicate(prev_fixed_bugs)
            prev_regression_blocked_bugs = _deduplicate(
                prev_regression_blocked_bugs)
            prev_fixed_bug_blocked_bugs = _deduplicate(
                prev_fixed_bug_blocked_bugs)

            regression_components = component_histogram(prev_regressions)
            fixed_bugs_components = component_histogram(prev_fixed_bugs)
            regression_blocked_bug_components = component_histogram(
                prev_regression_blocked_bugs)
            fixed_bug_blocked_bug_components = component_histogram(
                prev_fixed_bug_blocked_bugs)

            commit_group[
                "most_common_regression_components"] = regression_components
            # These are only used for component connections for the time being.
            if component:
                commit_group["prev_regressions"] = prev_regressions[-3:]
                commit_group["prev_fixed_bugs"] = prev_fixed_bugs[-3:]
                commit_group[
                    "prev_regression_blocked_bugs"] = prev_regression_blocked_bugs[
                        -3:]
                commit_group[
                    "prev_fixed_bug_blocked_bugs"] = prev_fixed_bug_blocked_bugs[
                        -3:]
                commit_group[
                    "most_common_fixed_bugs_components"] = fixed_bugs_components
                commit_group[
                    "most_common_regression_blocked_bug_components"] = regression_blocked_bug_components
                commit_group[
                    "most_common_fixed_bug_blocked_bug_components"] = fixed_bug_blocked_bug_components

        def get_commit_data(
                commit_list: List[repository.CommitDict]) -> List[dict]:
            if len(commit_list) == 0:
                return []

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list,
                                                  probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(
                        revision_map[revision_id])

                    if testing is None:
                        testing = "missing"
                else:
                    testing = None

                commits_data.append({
                    "id":
                    commit["node"],
                    "testing":
                    testing,
                    "risk":
                    float(probs[i][1]),
                    "backedout":
                    bool(commit["backedoutby"]),
                    "author":
                    commit["author_email"],
                    "reviewers":
                    commit["reviewers"],
                    "coverage": [
                        commit["cov_added"],
                        commit["cov_covered"],
                        commit["cov_unknown"],
                    ],
                })

            return commits_data

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        bug_to_commits = {}
        for bug_id, commit_iter in itertools.groupby(commits,
                                                     lambda x: x["bug_id"]):
            # TODO: Figure out what to do with bugs we couldn't download (security bugs).
            if bug_id not in bug_map:
                continue

            bug_to_commits[bug_id] = sorted(
                commit_iter, key=lambda x: hash_to_rev[x["node"]])

        bug_summaries = []
        for bug_id in bugs:
            if bug_id not in bug_map:
                continue

            commit_list = bug_to_commits.get(bug_id, [])
            commit_data = get_commit_data(commit_list)

            bug = bug_map[bug_id]

            bug_summary = {
                "id":
                bug_id,
                "regressor":
                bug_id in regressor_bug_ids,
                "regression":
                len(bug["regressed_by"]) > 0
                or any(keyword in bug["keywords"]
                       for keyword in ["regression", "talos-regression"])
                or ("cf_has_regression_range" in bug
                    and bug["cf_has_regression_range"] == "yes"),
                "whiteboard":
                bug["whiteboard"],
                "assignee":
                bug["assigned_to"]
                if bug["assigned_to"] != "*****@*****.**" else None,
                "versions":
                bugzilla.get_fixed_versions(bug),
                "component":
                get_full_component(bug),
                "team":
                bugzilla.component_to_team(component_team_mapping,
                                           bug["product"], bug["component"]),
                "summary":
                bug["summary"],
                "types":
                bug_to_types(bug),
                "severity":
                bug["severity"],
                "creation_date":
                dateutil.parser.parse(
                    bug["creation_time"]).strftime("%Y-%m-%d"),
                "date":
                max(
                    dateutil.parser.parse(commit["pushdate"])
                    for commit in commit_list).strftime("%Y-%m-%d")
                if len(commit_list) > 0 else None,
                "commits":
                commit_data,
                "meta_ids":
                list(blocker_to_meta[bug_id]),
                "risk_band":
                find_risk_band(max(commit["risk"] for commit in commit_data))
                if len(commit_data) > 0 else None,
            }

            get_prev_bugs_stats(bug_summary, commit_list)

            bug_summaries.append(bug_summary)

        landings_by_date = collections.defaultdict(list)
        for bug_summary in bug_summaries:
            landings_by_date[bug_summary["creation_date"]].append(bug_summary)

        with open("landings_by_date.json", "w") as f:
            output: dict = {
                "summaries": landings_by_date,
            }
            if meta_bugs is not None:
                output["featureMetaBugs"] = [{
                    "id":
                    meta_bug,
                    "summary":
                    bug_map[meta_bug]["summary"]
                } for meta_bug in meta_bugs]

            json.dump(output, f)

        # Retrieve components of test failures that occurred when landing patches to fix bugs in specific components.
        component_failures = collections.defaultdict(list)

        push_data_iter, push_data_count, all_runnables = test_scheduling.get_push_data(
            "group")

        for revisions, _, _, possible_regressions, likely_regressions in tqdm(
                push_data_iter(), total=push_data_count):
            commit_list = [
                commit_map[revision] for revision in revisions
                if revision in commit_map
            ]
            if len(commit_list) == 0:
                continue

            commit_bugs = [
                bug_map[commit["bug_id"]] for commit in commit_list
                if commit["bug_id"] in bug_map
            ]

            components = list(
                set(get_full_component(bug) for bug in commit_bugs))

            groups = [
                group for group in list(
                    set(possible_regressions + likely_regressions))
                if group.encode("utf-8") in path_to_component
            ]

            for group in groups:
                for component in components:
                    component_failures[component].append(path_to_component[
                        group.encode("utf-8")].tobytes().decode("utf-8"))

        # Filter out commits for which we have no bugs.
        commits = [commit for commit in commits if commit["bug_id"] in bug_map]

        # Sort commits by bug component, so we can use itertools.groupby to group them by bug component.
        commits.sort(key=lambda x: get_full_component(bug_map[x["bug_id"]]))

        commit_groups = []
        for component, commit_iter in itertools.groupby(
                commits, lambda x: get_full_component(bug_map[x["bug_id"]])):
            commit_group = {
                "component": component,
                "most_common_test_failure_components":
                histogram(component_failures[component])
                if component in component_failures else {},
            }
            get_prev_bugs_stats(commit_group, list(commit_iter), component)
            commit_groups.append(commit_group)

        with open("component_connections.json", "w") as f:
            json.dump(commit_groups, f)

        repository.close_component_mapping()
コード例 #6
0
    def go(self,
           bugs: List[int],
           meta_bugs: Optional[List[int]] = None) -> None:
        if meta_bugs is not None:
            bugs += meta_bugs + self.get_blocking_of(meta_bugs)

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(bugs)

        bugs_set = set(bugs)

        commits = [
            commit for commit in repository.get_commits()
            if commit["bug_id"] in bugs_set
        ]
        hash_to_rev = {commit["node"]: i for i, commit in enumerate(commits)}

        logger.info(f"{len(commits)} commits to analyze.")

        bug_ids = {commit["bug_id"] for commit in commits}

        logger.info(f"{len(bug_ids)} bugs to analyze.")

        bug_map = {}
        regressor_bug_ids = set()
        for bug in bugzilla.get_bugs():
            if bug["id"] in bugs_set:
                bug_map[bug["id"]] = bug

            if len(bug["regressions"]) > 0:
                regressor_bug_ids.add(bug["id"])

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        if meta_bugs is not None:
            blocker_to_meta = collections.defaultdict(set)
            for meta_bug in meta_bugs:
                if meta_bug not in bug_map:
                    continue

                for blocker_bug_id in bugzilla.find_blocking(
                        bug_map, bug_map[meta_bug]):
                    blocker_to_meta[blocker_bug_id].add(meta_bug)

        # TODO: Use past regressions by function information too (maybe first by function and if no results by file? or prioritize function and recentness?)

        def _download_past_bugs(url: str) -> dict:
            path = os.path.join("data", os.path.basename(url)[:-4])
            download_check_etag(url, path=f"{path}.zst")
            zstd_decompress(path)
            assert os.path.exists(path)
            with open(path, "r") as f:
                return json.load(f)

        past_regressions_by_file = _download_past_bugs(
            PAST_REGRESSIONS_BY_FILE_URL)
        past_fixed_bugs_by_file = _download_past_bugs(
            PAST_FIXED_BUGS_BY_FILE_URL)
        past_regression_blocked_bugs_by_file = _download_past_bugs(
            PAST_REGRESSION_BLOCKED_BUGS_BY_FILE_URL)
        past_fixed_bug_blocked_bugs_by_file = _download_past_bugs(
            PAST_FIXED_BUG_BLOCKED_BUGS_BY_FILE_URL)

        def component_histogram(bugs: List[dict]) -> Dict[str, float]:
            counter = collections.Counter(bug["component"] for bug in bugs)
            return {
                component: count / len(bugs)
                for component, count in counter.most_common()
            }

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        commit_groups = []
        for bug_id, commit_iter in itertools.groupby(commits,
                                                     lambda x: x["bug_id"]):
            # TODO: Figure out what to do with bugs we couldn't download (security bugs).
            if bug_id not in bug_map:
                continue

            commit_list = list(commit_iter)
            commit_list.sort(key=lambda x: hash_to_rev[x["node"]])

            # Find previous regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were fixed by touching the same files as these commits.
            # And find previous bugs that were blocked by regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were blocked by bugs that were fixed by touching the same files as those touched by these commits.
            prev_regressions: List[Dict[str, Any]] = []
            prev_fixed_bugs: List[Dict[str, Any]] = []
            prev_regression_blocked_bugs: List[Dict[str, Any]] = []
            prev_fixed_bug_blocked_bugs: List[Dict[str, Any]] = []
            for commit in commit_list:
                for path in commit["files"]:
                    if path in past_regressions_by_file:
                        prev_regressions.extend(
                            bug_summary
                            for bug_summary in past_regressions_by_file[path])

                    if path in past_fixed_bugs_by_file:
                        prev_fixed_bugs.extend(
                            bug_summary
                            for bug_summary in past_fixed_bugs_by_file[path])

                    if path in past_regression_blocked_bugs_by_file:
                        prev_regression_blocked_bugs.extend(
                            bug_summary for bug_summary in
                            past_regression_blocked_bugs_by_file[path])

                    if path in past_fixed_bug_blocked_bugs_by_file:
                        prev_fixed_bug_blocked_bugs.extend(
                            bug_summary for bug_summary in
                            past_fixed_bug_blocked_bugs_by_file[path])

            prev_regressions = _deduplicate(prev_regressions)
            prev_fixed_bugs = _deduplicate(prev_fixed_bugs)
            prev_regression_blocked_bugs = _deduplicate(
                prev_regression_blocked_bugs)
            prev_fixed_bug_blocked_bugs = _deduplicate(
                prev_fixed_bug_blocked_bugs)

            regression_components = component_histogram(prev_regressions)
            fixed_bugs_components = component_histogram(prev_fixed_bugs)
            regression_blocked_bug_components = component_histogram(
                prev_regression_blocked_bugs)
            fixed_bug_blocked_bug_components = component_histogram(
                prev_fixed_bug_blocked_bugs)

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list,
                                                  probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(
                        revision_map[revision_id])

                    if testing is None:
                        testing = "none"
                else:
                    testing = None

                commits_data.append({
                    "id":
                    commit["node"],
                    "testing":
                    testing,
                    "risk":
                    float(probs[i][1]),
                    "backedout":
                    bool(commit["backedoutby"]),
                    "regressor":
                    commit["bug_id"] in regressor_bug_ids,
                })

            bug = bug_map[bug_id]

            commit_groups.append({
                "id":
                bug_id,
                "versions":
                bugzilla.get_fixed_versions(bug),
                "component":
                "{}::{}".format(bug["product"], bug["component"]),
                "summary":
                bug["summary"],
                "date":
                max(
                    dateutil.parser.parse(commit["pushdate"])
                    for commit in commit_list).strftime("%Y-%m-%d"),
                "commits":
                commits_data,
                "meta_ids":
                list(blocker_to_meta[bug_id]),
                "prev_regressions":
                prev_regressions[-3:],
                "prev_fixed_bugs":
                prev_fixed_bugs[-3:],
                "prev_regression_blocked_bugs":
                prev_regression_blocked_bugs[-3:],
                "prev_fixed_bug_blocked_bugs":
                prev_fixed_bug_blocked_bugs[-3:],
                "most_common_regression_components":
                regression_components,
                "most_common_fixed_bugs_components":
                fixed_bugs_components,
                "most_common_regression_blocked_bug_components":
                regression_blocked_bug_components,
                "most_common_fixed_bug_blocked_bug_components":
                fixed_bug_blocked_bug_components,
            })

        landings_by_date = collections.defaultdict(list)
        for commit_group in commit_groups:
            landings_by_date[commit_group["date"]].append(commit_group)

        with open("landings_by_date.json", "w") as f:
            output: dict = {
                "landings": landings_by_date,
            }
            if meta_bugs is not None:
                output["featureMetaBugs"] = [{
                    "id":
                    meta_bug,
                    "summary":
                    bug_map[meta_bug]["summary"]
                } for meta_bug in meta_bugs]

            json.dump(output, f)
コード例 #7
0
    def go(self, days_start: int, days_end: int) -> None:
        commits = self.get_landed_since(days_start, days_end)

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(commit["bug_id"] for commit in commits
                               if commit["bug_id"])

        # Filter-out commits with no Phabricator revision linked to them, or with no testing tags.
        commits = [
            commit for commit in commits
            if repository.get_revision_id(commit) in revision_map
        ]
        logger.info(f"{len(commits)} revisions")

        # Filter-out commits with no testing tags.
        commits = [
            commit for commit in commits if phabricator.get_testing_project(
                revision_map[repository.get_revision_id(commit)]) is not None
        ]
        logger.info(f"{len(commits)} revisions with testing tags")

        def list_testing_projects(
            commits: Iterable[repository.CommitDict], ) -> Collection[str]:
            return list(
                filter(
                    None,
                    (phabricator.get_testing_project(
                        revision_map[repository.get_revision_id(commit)])
                     for commit in commits),
                ))

        testing_projects = list_testing_projects(commits)

        print(f"Most common testing tags (in {len(commits)} revisions):")
        for testing_project, count in collections.Counter(
                testing_projects).most_common():
            print(
                f"{testing_project} - {round(100 * count / len(testing_projects), 1)}%"
            )

        backedout_commits = [
            commit for commit in commits if commit["backedoutby"]
        ]
        backedout_testing_projects = list_testing_projects(backedout_commits)

        print(
            f"\nMost common testing tags for backed-out revisions (in {len(backedout_commits)} revisions):"
        )
        for testing_project, count in collections.Counter(
                backedout_testing_projects).most_common():
            print(
                f"{testing_project} - {round(100 * count / len(backedout_testing_projects), 1)}%"
            )

        regressor_bug_ids = {
            bug["id"]
            for bug in bugzilla.get_bugs() if len(bug["regressions"]) > 0
        }

        regressor_commits = [
            commit for commit in commits
            if commit["bug_id"] in regressor_bug_ids
        ]
        regressor_testing_projects = list_testing_projects(regressor_commits)

        print(
            f"\nMost common testing tags for revisions which caused regressions (in {len(regressor_commits)} revisions):"
        )
        for testing_project, count in collections.Counter(
                regressor_testing_projects).most_common():
            print(
                f"{testing_project} - {round(100 * count / len(regressor_testing_projects), 1)}%"
            )