Ejemplo n.º 1
0
    def get_commits_to_ignore(self) -> None:
        assert db.download(repository.COMMITS_DB)

        ignored = set()
        commits_to_ignore = []
        all_commits = set()

        annotate_ignore_nodes = {
            node
            for node, label in labels.get_labels("annotateignore")
            if label == "1"
        }

        for commit in repository.get_commits(include_no_bug=True,
                                             include_backouts=True,
                                             include_ignored=True):
            all_commits.add(commit["node"][:12])

            if (commit["ignored"] or commit["backedoutby"]
                    or not commit["bug_id"] or len(commit["backsout"]) > 0
                    or repository.is_wptsync(commit)
                    or commit["node"] in annotate_ignore_nodes):
                commits_to_ignore.append({
                    "rev":
                    commit["node"],
                    "type":
                    "backedout" if commit["backedoutby"] else "",
                })
                ignored.add(commit["node"][:12])

            if len(commit["backsout"]) > 0:
                for backedout in commit["backsout"]:
                    if backedout[:12] in ignored:
                        continue
                    ignored.add(backedout[:12])

                    commits_to_ignore.append({
                        "rev": backedout,
                        "type": "backedout"
                    })

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        # Skip backed-out commits which aren't in the repository (commits which landed *before* the Mercurial history
        # started, and backouts which mentioned a bad hash in their message).
        commits_to_ignore = [
            c for c in commits_to_ignore if c["rev"][:12] in all_commits
        ]

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        logger.info("...of which {} are backed-out".format(
            sum(1 for commit in commits_to_ignore
                if commit["type"] == "backedout")))

        db.write(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)
        db.upload(IGNORED_COMMITS_DB)
Ejemplo n.º 2
0
    def get_labels(self):
        classes = {}

        regressors = set(
            r["bug_introducing_rev"]
            for r in db.read(BUG_INTRODUCING_COMMITS_DB)
            if r["bug_introducing_rev"]
        )

        regressor_bugs = set(
            sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), [])
        )

        for commit_data in repository.get_commits():
            if commit_data["backedoutby"]:
                continue

            if repository.is_wptsync(commit_data):
                continue

            push_date = dateutil.parser.parse(commit_data["pushdate"])

            # Skip commits used for the evaluation phase.
            if push_date > datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS):
                continue

            node = commit_data["node"]
            if node in regressors or commit_data["bug_id"] in regressor_bugs:
                classes[node] = 1
            else:
                # The labels we have are only from two years and six months ago (see the regressor finder script).
                if push_date < datetime.utcnow() - relativedelta(years=2, months=6):
                    continue

                # We remove the last 6 months, as there could be regressions which haven't been filed yet.
                if push_date > datetime.utcnow() - relativedelta(months=6):
                    continue

                classes[node] = 0

        print(
            "{} commits caused regressions".format(
                sum(1 for label in classes.values() if label == 1)
            )
        )

        print(
            "{} commits did not cause regressions".format(
                sum(1 for label in classes.values() if label == 0)
            )
        )

        return classes, [0, 1]
        def generate_all_data() -> Generator[Dict[str, Any], None, None]:
            past_failures = test_scheduling.get_past_failures(
                granularity, False)

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                commit_map[commit_data["node"]] = commit_data

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            if granularity in ("group", "config_group"):
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

            for (
                    i,
                (
                    revisions,
                    fix_revision,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ),
            ) in enumerate(tqdm(push_data_iter(), total=push_data_count)):
                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                # Skip wptsync commits, since they are not like normal pushes made by developers.
                if any(repository.is_wptsync(commit) for commit in commits):
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions +
                        likely_regressions))

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity in ("group", "config_group"):
                    update_touched_together_gen.send(commits[0]["node"])

                result_data = []
                for data in test_scheduling.generate_data(
                        granularity,
                        past_failures,
                        merged_commits,
                        push_num,
                        runnables_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result_data.append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield {
                        "revs": revisions,
                        "data": result_data,
                    }

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(
                f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
Ejemplo n.º 4
0
    def overwrite_classes(self, commits, classes, probabilities):
        for i, commit in enumerate(commits):
            if repository.is_wptsync(commit):
                classes[i] = 0 if not probabilities else [1.0, 0.0]

        return classes
Ejemplo n.º 5
0
    def evaluation(self) -> None:
        bug_regressors = set(
            sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), [])
        )

        commits = []

        for commit_data in repository.get_commits():
            if commit_data["backedoutby"]:
                continue

            if repository.is_wptsync(commit_data):
                continue

            push_date = dateutil.parser.parse(commit_data["pushdate"])

            # Use the past two months of data (make sure it is not also used for training!).
            if push_date < datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS):
                continue

            commits.append(commit_data)

        print(f"{len(commits)} commits in the evaluation set")
        bugs_num = len(set(commit["bug_id"] for commit in commits))
        print(f"{bugs_num} bugs in the evaluation set")

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        results = []
        for bug_id, commit_iter in itertools.groupby(commits, lambda x: x["bug_id"]):
            probs = self.classify(list(commit_iter), probabilities=True)
            results.append((max(probs[:, 1]), bug_id in bug_regressors))

        # Let's define the risk bands relatively to average risk.
        # On average, around 1 out of 10 (8%) patches cause regressions.
        # Risk band 1 - around 1 out of 20 (4%) patches within this risk band cause regressions.
        # Risk band 2 - around 1 out of 10 (8%) patches within this risk band cause regressions.
        # Risk bank 3 - around 1 out of 5  (16%) patches within this risk band cause regressions.

        # Step 1. Calculate % of patches which cause regressions.
        total_landings = len(results)
        total_regressions = sum(1 for _, is_reg in results if is_reg)
        average_regression_rate = total_regressions / total_landings

        print(f"Average risk is {average_regression_rate}")

        MIN_SAMPLE = 200

        # Step 2. Define risk band 1 (half than average risk).
        max_band1_prob = 1.0
        total_landings = 0
        total_regressions = 0
        results.sort(key=lambda x: x[0])
        for prob, is_reg in results:
            total_landings += 1
            if is_reg:
                total_regressions += 1

            if total_landings < MIN_SAMPLE:
                continue

            print(
                f"{total_regressions} out of {total_landings} patches with risk lower than {prob} caused regressions ({total_regressions / total_landings}"
            )

            # No need to go further, since we are interested in half than average risk.
            if (
                total_regressions / total_landings
                >= (average_regression_rate / 2) + 0.01
            ):
                max_band1_prob = prob
                break

        print("\n\n")

        # Step 3. Define risk band 3 (double than average risk).
        min_band3_prob = 0.0
        total_landings = 0
        total_regressions = 0
        results.sort(key=lambda x: x[0], reverse=True)
        for prob, is_reg in results:
            total_landings += 1
            if is_reg:
                total_regressions += 1

            if total_landings < MIN_SAMPLE:
                continue

            print(
                f"{total_regressions} out of {total_landings} patches with risk higher than {prob} caused regressions ({total_regressions / total_landings}"
            )

            # No need to go further, since we are interested in double than average risk.
            if (
                total_regressions / total_landings
                <= (average_regression_rate * 2) - 0.01
            ):
                min_band3_prob = prob
                break

        print("\n\n")

        # Step 4. Define risk band 2 (average risk).
        results.sort(key=lambda x: x[0])
        for prob_start in np.arange(max_band1_prob / 2, max_band1_prob + 0.02, 0.01):
            for prob_end in np.arange(min_band3_prob - 0.02, 0.99, 0.01):
                total_landings = 0
                total_regressions = 0
                for prob, is_reg in results:
                    if prob < prob_start or prob > prob_end:
                        continue

                    total_landings += 1
                    if is_reg:
                        total_regressions += 1

                if total_landings < MIN_SAMPLE:
                    continue

                if (
                    (average_regression_rate / 2) + 0.01
                    > total_regressions / total_landings
                    > (average_regression_rate * 2) - 0.01
                ):
                    continue

                print(
                    f"{total_regressions} out of {total_landings} patches with risk between {prob_start} and {prob_end} caused regressions ({total_regressions / total_landings}"
                )
Ejemplo n.º 6
0
    def get_labels(self):
        classes = {}

        if self.use_finder or self.exclude_finder:
            if self.finder_regressions_only:
                regression_fixes = set(
                    bug_fixing_commit["rev"]
                    for bug_fixing_commit in db.read(BUG_FIXING_COMMITS_DB)
                    if bug_fixing_commit["type"] == "r"
                )

            regressors = set(
                r["bug_introducing_rev"]
                for r in db.read(BUG_INTRODUCING_COMMITS_DB)
                if r["bug_introducing_rev"]
                and (
                    not self.finder_regressions_only
                    or r["bug_fixing_rev"] in regression_fixes
                )
            )

        regressor_bugs = set(
            sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), [])
        )

        for commit_data in repository.get_commits():
            if commit_data["backedoutby"]:
                continue

            if repository.is_wptsync(commit_data):
                continue

            push_date = dateutil.parser.parse(commit_data["pushdate"])

            # Skip commits used for the evaluation phase.
            if push_date > datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS):
                continue

            node = commit_data["node"]
            if commit_data["bug_id"] in regressor_bugs or (
                self.use_finder and node in regressors
            ):
                classes[node] = 1
            elif not self.exclude_finder or node not in regressors:
                # The labels we have are only from two years ago (see https://groups.google.com/g/mozilla.dev.platform/c/SjjW6_O-FqM/m/G-CrIVT2BAAJ).
                # While we can go further back with the regressor finder script, it isn't remotely
                # as precise as the "Regressed By" data.
                # In the future, we might want to re-evaluate this limit (e.g. extend ), but we
                # have to be careful (using too old patches might cause worse results as patch
                # characteristics evolve over time).
                if push_date < datetime.utcnow() - relativedelta(years=2):
                    continue

                # We remove the last 3 months, as there could be regressions which haven't been
                # filed yet. While it is true that some regressions might not be found for a long
                # time, more than 3 months seems overly conservative.
                # There will be some patches we currently add to the clean set and will later move
                # to the regressor set, but they are a very small subset.
                if push_date > datetime.utcnow() - relativedelta(months=3):
                    continue

                classes[node] = 0

        print(
            "{} commits caused regressions".format(
                sum(1 for label in classes.values() if label == 1)
            )
        )

        print(
            "{} commits did not cause regressions".format(
                sum(1 for label in classes.values() if label == 0)
            )
        )

        return classes, [0, 1]