コード例 #1
0
ファイル: model.py プロジェクト: Felihong/bugbug
    def items_gen(self, classes):
        if not self.bug_data:
            bug_map = None
        else:
            all_bug_ids = set(commit["bug_id"]
                              for commit in repository.get_commits()
                              if commit["node"] in classes)

            bug_map = {}

            for bug in bugzilla.get_bugs():
                if bug["id"] not in all_bug_ids:
                    continue

                bug_map[bug["id"]] = bug

            assert len(bug_map) > 0

        for commit in repository.get_commits():
            if commit["node"] not in classes:
                continue

            if self.bug_data:
                if commit["bug_id"] in bug_map:
                    commit["bug"] = bug_map[commit["bug_id"]]
                else:
                    commit["bug"] = {}

            yield commit, classes[commit["node"]]
コード例 #2
0
ファイル: boot.py プロジェクト: ahal/bugbug
def boot_worker():
    # Preload models
    bugbug_http.models.preload_models()

    # Clone mozilla central
    repo_dir = os.environ.get("BUGBUG_REPO_DIR",
                              os.path.join(tempfile.gettempdir(), "bugbug-hg"))
    logger.info(f"Cloning mozilla-central in {repo_dir}...")
    repository.clone(repo_dir)

    # Download databases
    logger.info("Downloading test scheduling DB support file...")
    assert (db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
    ) or ALLOW_MISSING_MODELS)

    # Download commits DB
    logger.info("Downloading commits DB...")
    commits_db_downloaded = db.download(repository.COMMITS_DB,
                                        support_files_too=True)
    if not ALLOW_MISSING_MODELS:
        assert commits_db_downloaded

    if commits_db_downloaded:
        # And update it
        logger.info("Browsing all commits...")
        for commit in repository.get_commits():
            pass

        rev_start = "children({})".format(commit["node"])
        logger.info("Updating commits DB...")
        repository.download_commits(repo_dir, rev_start)

    logger.info("Worker boot done")
コード例 #3
0
    def get_landed_and_filed_since(self, days: int) -> List[int]:
        since = datetime.utcnow() - timedelta(days=days)

        commits = []
        last_commit_by_bug: Dict[int, datetime] = {}
        for commit in repository.get_commits():
            if not commit["bug_id"]:
                continue

            push_date = dateutil.parser.parse(commit["pushdate"])

            if push_date >= since and (
                    commit["bug_id"] not in last_commit_by_bug or push_date -
                    last_commit_by_bug[commit["bug_id"]] < timedelta(days=91)
                    or not all(repository.is_test(p)
                               for p in commit["files"])):
                commits.append(commit)

            last_commit_by_bug[commit["bug_id"]] = push_date

        logger.info(f"Retrieving bug IDs since {days} days ago")
        timespan_ids = bugzilla.get_ids_between(since,
                                                resolution=["---", "FIXED"])

        return list(
            set(commit["bug_id"] for commit in commits) | set(timespan_ids))
コード例 #4
0
    def get_landed_and_filed_since(self, days: int) -> List[int]:
        since = datetime.utcnow() - timedelta(days=days)

        commits = [
            commit for commit in repository.get_commits()
            if dateutil.parser.parse(commit["pushdate"]) >= since
            and commit["bug_id"]
        ]

        logger.info(f"Retrieving bug IDs since {days} days ago")
        timespan_ids = bugzilla.get_ids_between(since, datetime.utcnow())
        bugzilla.download_bugs(timespan_ids)

        bug_ids = set(commit["bug_id"] for commit in commits)
        bug_ids.update(bug["id"] for bug in bugzilla.get_bugs()
                       if dateutil.parser.parse(bug["creation_time"]).replace(
                           tzinfo=None) >= since and bug["resolution"] not in [
                               "INVALID",
                               "WONTFIX",
                               "INACTIVE",
                               "DUPLICATE",
                               "INCOMPLETE",
                               "MOVED",
                               "WORKSFORME",
                           ])

        return list(bug_ids)
コード例 #5
0
    def __init__(self, repo_dir: str) -> None:
        if not os.path.exists(repo_dir):
            repository.clone(repo_dir)
        else:
            repository.pull(repo_dir, "mozilla-central", "tip")

        logger.info("Downloading commits database...")
        assert db.download(repository.COMMITS_DB, support_files_too=True)

        logger.info("Updating commits DB...")
        for commit in repository.get_commits():
            pass

        repository.download_commits(
            repo_dir,
            rev_start="children({})".format(commit["node"]),
        )

        logger.info("Downloading revisions database...")
        assert db.download(phabricator.REVISIONS_DB)

        logger.info("Downloading bugs database...")
        assert db.download(bugzilla.BUGS_DB)

        phabricator.set_api_key(get_secret("PHABRICATOR_URL"),
                                get_secret("PHABRICATOR_TOKEN"))
コード例 #6
0
ファイル: backout.py プロジェクト: mythmon/bugbug
    def get_labels(self):
        classes = {}

        for commit_data in repository.get_commits():
            classes[commit_data["node"]] = 1 if commit_data["ever_backedout"] else 0

        return classes, [0, 1]
コード例 #7
0
ファイル: regressor.py プロジェクト: x249wang/bugbug
    def get_labels(self):
        classes = {}

        regressors = set(r[0] for r in labels.get_labels("regressor"))

        for commit_data in repository.get_commits():
            if commit_data["ever_backedout"]:
                continue

            node = commit_data["node"]
            if node in regressors:
                classes[node] = 1
            else:
                push_date = dateutil.parser.parse(commit_data["pushdate"])

                # The labels we have are only from 2016-11-01.
                # TODO: Automate collection of labels and somehow remove this check.
                if push_date < datetime(2016, 11, 1):
                    continue

                # We remove the last 6 months, as there could be regressions which haven't been filed yet.
                if push_date > datetime.utcnow() - relativedelta(months=6):
                    continue

                classes[node] = 0

        print("{} commits caused regressions".format(
            sum(1 for label in classes.values() if label == 1)))

        print("{} commits did not cause regressions".format(
            sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]
コード例 #8
0
ファイル: testfailure.py プロジェクト: mvkski/bugbug
    def items_gen(self, classes):
        commit_map = {}

        for commit in repository.get_commits():
            commit_map[commit["node"]] = commit

        assert len(commit_map) > 0

        done = set()
        for test_data in test_scheduling.get_test_scheduling_history("label"):
            revs = test_data["revs"]

            if revs[0] in done:
                continue

            if revs[0] not in classes:
                continue

            done.add(revs[0])

            commits = tuple(commit_map[revision] for revision in revs
                            if revision in commit_map)
            if len(commits) == 0:
                continue

            commit_data = commit_features.merge_commits(commits)
            yield commit_data, classes[revs[0]]
コード例 #9
0
ファイル: model.py プロジェクト: Felihong/bugbug
    def items_gen(self, classes):
        if not self.commit_data:
            commit_map = None
        else:
            commit_map = defaultdict(list)

            for commit in repository.get_commits():
                bug_id = commit["bug_id"]
                if not bug_id:
                    continue

                commit_map[bug_id].append(commit)

            assert len(commit_map) > 0

        for bug in bugzilla.get_bugs():
            bug_id = bug["id"]
            if bug_id not in classes:
                continue

            if self.commit_data:
                if bug_id in commit_map:
                    bug["commits"] = commit_map[bug_id]
                else:
                    bug["commits"] = []

            yield bug, classes[bug_id]
コード例 #10
0
ファイル: backout.py プロジェクト: yuyan9/bugbug
    def get_labels(self):
        classes = {}

        two_years_and_six_months_ago = datetime.utcnow() - relativedelta(
            years=2, months=6
        )

        for commit_data in repository.get_commits():
            pushdate = dateutil.parser.parse(commit_data["pushdate"])
            if pushdate < two_years_and_six_months_ago:
                continue

            classes[commit_data["node"]] = 1 if commit_data["backedoutby"] else 0

        print(
            "{} commits were backed out".format(
                sum(1 for label in classes.values() if label == 1)
            )
        )
        print(
            "{} commits were not backed out".format(
                sum(1 for label in classes.values() if label == 0)
            )
        )

        return classes, [0, 1]
コード例 #11
0
    def retrieve_revisions(self, limit: Optional[int] = None) -> None:
        phabricator.set_api_key(get_secret("PHABRICATOR_URL"),
                                get_secret("PHABRICATOR_TOKEN"))

        db.download(phabricator.REVISIONS_DB)

        # Get the commits DB, as we need it to get the revision IDs linked to recent commits.
        assert db.download(repository.COMMITS_DB)

        # Get the bugs DB, as we need it to get the revision IDs linked to bugs.
        assert db.download(bugzilla.BUGS_DB)

        # Get IDs of revisions linked to commits since a year ago.
        start_date = datetime.utcnow() - relativedelta(years=1)
        revision_ids = list((filter(
            None,
            (repository.get_revision_id(commit)
             for commit in repository.get_commits()
             if dateutil.parser.parse(commit["pushdate"]) >= start_date),
        )))
        if limit is not None:
            revision_ids = revision_ids[-limit:]

        # Get IDs of revisions linked to bugs since a year ago.
        for bug in bugzilla.get_bugs():
            if (dateutil.parser.parse(
                    bug["creation_time"]).replace(tzinfo=None) < start_date):
                continue

            revision_ids += bugzilla.get_revision_ids(bug)

        phabricator.download_revisions(revision_ids)

        zstd_compress(phabricator.REVISIONS_DB)
コード例 #12
0
ファイル: commit_retriever.py プロジェクト: rock420/bugbug
    def retrieve_commits(self, limit):
        repository.clone(self.repo_dir)

        if limit:
            # Mercurial revset supports negative integers starting from tip
            rev_start = -limit
        else:
            db.download(repository.COMMITS_DB, support_files_too=True)

            rev_start = 0
            for commit in repository.get_commits():
                rev_start = f"children({commit['node']})"

        with hglib.open(self.repo_dir) as hg:
            revs = repository.get_revs(hg, rev_start)

        chunk_size = 70000

        for i in range(0, len(revs), chunk_size):
            repository.download_commits(self.repo_dir,
                                        revs=revs[i:(i + chunk_size)])

        logger.info("commit data extracted from repository")

        # Some commits that were already in the DB from the previous run might need
        # to be updated (e.g. coverage information).
        repository.update_commits()

        zstd_compress(repository.COMMITS_DB)
        create_tar_zst(os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
コード例 #13
0
    def get_bugs(self, date="today", bug_ids=[]):
        self.query_url = ""

        # Ignore already analyzed commits.
        for commit in repository.get_commits():
            pass

        rev_start = f"children({commit['node']})"

        commits = repository.download_commits(self.repo_dir,
                                              rev_start,
                                              ret=True)

        commits = [
            commit for commit in commits if not commit["ever_backedout"]
        ]

        probs = self.model.classify(commits, True)
        indexes = probs.argmax(axis=-1)

        result = {}
        for commit, prob, index in zip(commits, probs, indexes):
            result[commit["node"]] = {
                "id": commit["node"],
                "summary": commit["desc"].split("\n", 1)[0],
                "result": "Risky" if prob[1] > 0.5 else "Not risky",
                "confidence": nice_round(prob[index]),
            }

        return result
コード例 #14
0
ファイル: annotate_ignore.py プロジェクト: rock420/bugbug
    def get_labels(self):
        classes = {}

        # Commits in regressor or regression bugs usually are not formatting changes.
        regression_related_bugs = set(
            sum(
                (bug["regressed_by"] + bug["regressions"]
                 for bug in bugzilla.get_bugs()),
                [],
            ))

        for commit_data in repository.get_commits(include_ignored=True):
            if commit_data["backedoutby"]:
                continue

            node = commit_data["node"]

            if commit_data["ignored"]:
                classes[node] = 1
            elif commit_data["bug_id"] in regression_related_bugs:
                classes[node] = 0

        for node, label in labels.get_labels("annotateignore"):
            classes[node] = int(label)

        print("{} commits that can be ignored".format(
            sum(1 for label in classes.values() if label == 1)))

        print("{} commits that cannot be ignored".format(
            sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]
コード例 #15
0
ファイル: regressor.py プロジェクト: beatlse/bugbug
    def get_labels(self):
        classes = {}

        regressors = set(r["bug_introducing_rev"]
                         for r in db.read(BUG_INTRODUCING_COMMITS_DB)
                         if r["bug_introducing_rev"])

        for commit_data in repository.get_commits():
            if commit_data["ever_backedout"]:
                continue

            node = commit_data["node"]
            if node in regressors:
                classes[node] = 1
            else:
                push_date = dateutil.parser.parse(commit_data["pushdate"])

                # The labels we have are only from two years and six months ago (see the regressor finder script).
                if push_date < datetime.utcnow() - relativedelta(years=2,
                                                                 months=6):
                    continue

                # We remove the last 6 months, as there could be regressions which haven't been filed yet.
                if push_date > datetime.utcnow() - relativedelta(months=6):
                    continue

                classes[node] = 0

        print("{} commits caused regressions".format(
            sum(1 for label in classes.values() if label == 1)))

        print("{} commits did not cause regressions".format(
            sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]
コード例 #16
0
    def __init__(self, repo_dir: str) -> None:
        repository.clone(repo_dir)

        logger.info("Downloading commits database...")
        assert db.download(repository.COMMITS_DB, support_files_too=True)

        logger.info("Updating commits DB...")
        for commit in repository.get_commits():
            pass

        repository.download_commits(
            repo_dir,
            rev_start="children({})".format(commit["node"]),
        )

        logger.info("Downloading revisions database...")
        assert db.download(phabricator.REVISIONS_DB)

        logger.info("Downloading bugs database...")
        assert db.download(bugzilla.BUGS_DB)

        logger.info("Download commit classifications...")
        assert db.download(BUG_FIXING_COMMITS_DB)

        self.regressor_model = download_and_load_model("regressor")

        bugzilla.set_token(get_secret("BUGZILLA_TOKEN"))
        phabricator.set_api_key(get_secret("PHABRICATOR_URL"),
                                get_secret("PHABRICATOR_TOKEN"))
コード例 #17
0
    def items_gen(self, classes):
        commit_map = {}

        for commit in repository.get_commits():
            commit_map[commit["node"]] = commit

        assert len(commit_map) > 0

        for test_data in test_scheduling.get_test_scheduling_history(
                self.granularity):
            revs = test_data["revs"]
            name = test_data["name"]

            if (revs[0], name) not in classes:
                continue

            commits = tuple(commit_map[revision]
                            for revision in test_data["revs"]
                            if revision in commit_map)
            if len(commits) == 0:
                continue

            commit_data = commit_features.merge_commits(commits)
            commit_data["test_job"] = test_data
            yield commit_data, classes[(revs[0], name)]
コード例 #18
0
        def generate_data():
            commits_with_data = set()
            saved_nodes = set()

            push_num = 0
            for commit_data in tqdm(repository.get_commits()):
                node = commit_data["node"]

                if node not in push_data:
                    continue

                commits_with_data.add(node)

                commit_push_data = push_data[node]

                for task in commit_push_data[0]:
                    if not any(task.startswith(j) for j in JOBS_TO_CONSIDER):
                        continue

                    total_failures = get_past_failures(task, push_num)
                    past_7_pushes_failures = total_failures - get_past_failures(
                        task, push_num - 7)
                    past_14_pushes_failures = total_failures - get_past_failures(
                        task, push_num - 14)
                    past_28_pushes_failures = total_failures - get_past_failures(
                        task, push_num - 28)
                    past_56_pushes_failures = total_failures - get_past_failures(
                        task, push_num - 56)

                    pushdate = dateutil.parser.parse(commit_data["pushdate"])
                    if pushdate > HISTORY_DATE_START:
                        saved_nodes.add(node)

                        yield {
                            "rev": node,
                            "name": task,
                            "failures": total_failures,
                            "failures_past_7_pushes": past_7_pushes_failures,
                            "failures_past_14_pushes": past_14_pushes_failures,
                            "failures_past_28_pushes": past_28_pushes_failures,
                            "failures_past_56_pushes": past_56_pushes_failures,
                            "is_possible_regression": task
                            in commit_push_data[1],
                            "is_likely_regression": task
                            in commit_push_data[2],
                        }

                    if task in commit_push_data[1] or task in commit_push_data[
                            2]:
                        past_failures[task][push_num] = total_failures + 1

                push_num += 1

            logger.info(f"push data nodes: {len(push_data)}")

            logger.info(
                f"commits linked to push data: {len(commits_with_data)}")

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
コード例 #19
0
ファイル: testselect.py プロジェクト: DeepikaKaranji/bugbug
def get_commit_map():
    commit_map = {}

    for commit in repository.get_commits():
        commit_map[commit["node"]] = commit

    assert len(commit_map) > 0
    return commit_map
コード例 #20
0
ファイル: backout.py プロジェクト: rajibmitra/bugbug
    def get_labels(self):
        classes = {}

        for commit_data in repository.get_commits():
            classes[commit_data[
                "node"]] = 1 if commit_data["ever_backedout"] else 0

        return classes, [0, 1]
コード例 #21
0
def boot_worker():
    # Clone autoland
    logger.info(f"Cloning mozilla autoland in {REPO_DIR}...")
    repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")

    # Download test scheduling DB support files.
    logger.info("Downloading test scheduling DB support files...")
    assert (db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
    ) or ALLOW_MISSING_MODELS)

    assert (db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_GROUP_DB,
    ) or ALLOW_MISSING_MODELS)

    assert (db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.TOUCHED_TOGETHER_DB,
    ) or ALLOW_MISSING_MODELS)

    # Download commits DB
    logger.info("Downloading commits DB...")
    commits_db_downloaded = db.download(repository.COMMITS_DB,
                                        support_files_too=True)
    if not ALLOW_MISSING_MODELS:
        assert commits_db_downloaded

    if commits_db_downloaded:
        # And update it
        logger.info("Browsing all commits...")
        for commit in repository.get_commits():
            pass

        rev_start = "children({})".format(commit["node"])
        logger.info("Updating commits DB...")
        commits = repository.download_commits(REPO_DIR,
                                              rev_start,
                                              use_single_process=True)

        if len(commits) > 0:
            # Update the touched together DB.
            update_touched_together_gen = test_scheduling.update_touched_together(
            )
            next(update_touched_together_gen)

            update_touched_together_gen.send(commits[-1]["node"])

            try:
                update_touched_together_gen.send(None)
            except StopIteration:
                pass

    # Preload models
    bugbug_http.models.preload_models()

    logger.info("Worker boot done")
コード例 #22
0
    def get_commits_to_ignore(self) -> None:
        assert db.download(repository.COMMITS_DB)

        ignored = set()
        commits_to_ignore = []
        all_commits = set()

        annotate_ignore_nodes = {
            node
            for node, label in labels.get_labels("annotateignore")
            if label == "1"
        }

        for commit in repository.get_commits(include_no_bug=True,
                                             include_backouts=True,
                                             include_ignored=True):
            all_commits.add(commit["node"][:12])

            if (commit["ignored"] or commit["backedoutby"]
                    or not commit["bug_id"] or len(commit["backsout"]) > 0
                    or repository.is_wptsync(commit)
                    or commit["node"] in annotate_ignore_nodes):
                commits_to_ignore.append({
                    "rev":
                    commit["node"],
                    "type":
                    "backedout" if commit["backedoutby"] else "",
                })
                ignored.add(commit["node"][:12])

            if len(commit["backsout"]) > 0:
                for backedout in commit["backsout"]:
                    if backedout[:12] in ignored:
                        continue
                    ignored.add(backedout[:12])

                    commits_to_ignore.append({
                        "rev": backedout,
                        "type": "backedout"
                    })

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        # Skip backed-out commits which aren't in the repository (commits which landed *before* the Mercurial history
        # started, and backouts which mentioned a bad hash in their message).
        commits_to_ignore = [
            c for c in commits_to_ignore if c["rev"][:12] in all_commits
        ]

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        logger.info("...of which {} are backed-out".format(
            sum(1 for commit in commits_to_ignore
                if commit["type"] == "backedout")))

        db.write(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)
        db.upload(IGNORED_COMMITS_DB)
コード例 #23
0
    def get_landed_since(self, days: int) -> List[int]:
        since = datetime.utcnow() - timedelta(days=days)

        commits = [
            commit for commit in repository.get_commits()
            if dateutil.parser.parse(commit["pushdate"]) >= since
            and commit["bug_id"]
        ]

        return [commit["bug_id"] for commit in commits]
コード例 #24
0
    def get_landed_since(self, days_start: int,
                         days_end: int) -> Collection[repository.CommitDict]:
        since = datetime.utcnow() - timedelta(days=days_start)
        until = datetime.utcnow() - timedelta(days=days_end)

        return [
            commit for commit in repository.get_commits(include_no_bug=True,
                                                        include_backouts=True,
                                                        include_ignored=True)
            if since <= dateutil.parser.parse(commit["pushdate"]) <= until
        ]
コード例 #25
0
    def update_commit_db(self):
        repository.clone(self.repo_dir, update=True)

        assert db.download(repository.COMMITS_DB, support_files_too=True)

        for commit in repository.get_commits():
            pass

        repository.download_commits(self.repo_dir,
                                    rev_start="children({})".format(
                                        commit["node"]))
コード例 #26
0
def update_touched_together():
    touched_together = get_touched_together_db()
    last_analyzed = (
        touched_together["last_analyzed"]
        if "last_analyzed" in touched_together
        else None
    )

    # We can start once we get to the last revision we added in the previous run.
    can_start = True if last_analyzed is None else False

    seen = set()

    end_revision = yield

    i = 0

    for commit in repository.get_commits():
        seen.add(commit["node"])

        if can_start:
            touched_together["last_analyzed"] = commit["node"]

            # As in the test scheduling history retriever script, for now skip commits which are too large.
            if len(commit["files"]) <= 50 and not commit["ever_backedout"]:
                # Number of times a source file was touched together with a directory.
                for f1 in commit["files"]:
                    for d2 in set(
                        os.path.dirname(f) for f in commit["files"] if f != f1
                    ):
                        set_touched_together(f1, d2)

                # Number of times a directory was touched together with another directory.
                for d1, d2 in itertools.combinations(
                    list(set(os.path.dirname(f) for f in commit["files"])), 2
                ):
                    set_touched_together(d1, d2)

                i += 1
                if i % 5000:
                    touched_together.sync()
        elif last_analyzed == commit["node"]:
            can_start = True

        if commit["node"] == end_revision:
            # Some commits could be in slightly different order between mozilla-central and autoland.
            # It's a small detail that shouldn't affect the features, but we need to take it into account.
            while end_revision in seen:
                end_revision = yield

            if end_revision is None:
                break

    touched_together.close()
コード例 #27
0
ファイル: regressor.py プロジェクト: Aayush-hub/bugbug
    def get_labels(self):
        classes = {}

        regressors = set(
            r["bug_introducing_rev"]
            for r in db.read(BUG_INTRODUCING_COMMITS_DB)
            if r["bug_introducing_rev"]
        )

        regressor_bugs = set(
            sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), [])
        )

        for commit_data in repository.get_commits():
            if commit_data["backedoutby"]:
                continue

            if repository.is_wptsync(commit_data):
                continue

            push_date = dateutil.parser.parse(commit_data["pushdate"])

            # Skip commits used for the evaluation phase.
            if push_date > datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS):
                continue

            node = commit_data["node"]
            if node in regressors or commit_data["bug_id"] in regressor_bugs:
                classes[node] = 1
            else:
                # The labels we have are only from two years and six months ago (see the regressor finder script).
                if push_date < datetime.utcnow() - relativedelta(years=2, months=6):
                    continue

                # We remove the last 6 months, as there could be regressions which haven't been filed yet.
                if push_date > datetime.utcnow() - relativedelta(months=6):
                    continue

                classes[node] = 0

        print(
            "{} commits caused regressions".format(
                sum(1 for label in classes.values() if label == 1)
            )
        )

        print(
            "{} commits did not cause regressions".format(
                sum(1 for label in classes.values() if label == 0)
            )
        )

        return classes, [0, 1]
コード例 #28
0
    def classify(
        self,
        revision=None,
        phabricator_deployment=None,
        diff_id=None,
        runnable_jobs_path=None,
    ):
        if revision is not None:
            assert phabricator_deployment is None
            assert diff_id is None

        if diff_id is not None:
            assert phabricator_deployment is not None
            assert revision is None

        self.update_commit_db()

        if phabricator_deployment is not None and diff_id is not None:
            with hglib.open(self.repo_dir) as hg:
                self.apply_phab(hg, phabricator_deployment, diff_id)

                revision = hg.log(
                    revrange="not public()")[0].node.decode("utf-8")

            commits = repository.download_commits(
                self.repo_dir,
                rev_start=revision,
                save=False,
                use_single_process=self.use_single_process,
            )
        else:
            commits = []

            for commit in repository.get_commits():
                if commit["node"] == revision:
                    commits.append(commit)
                    break

            # The commit to analyze was not in our DB, let's mine it.
            if len(commits) == 0:
                commits = repository.download_commits(
                    self.repo_dir,
                    revs=[revision],
                    save=False,
                    use_single_process=self.use_single_process,
                )

        assert len(commits) > 0, "There are no commits to analyze"

        if not self.use_test_history:
            self.classify_regressor(commits)
        else:
            self.classify_test_select(commits, runnable_jobs_path)
コード例 #29
0
ファイル: testselect.py プロジェクト: pdehaan/bugbug
def get_commit_map(
    revs: Optional[Set[test_scheduling.Revision]] = None,
) -> Dict[test_scheduling.Revision, repository.CommitDict]:
    commit_map = {}

    for commit in repository.get_commits():
        if revs is not None and commit["node"] not in revs:
            continue

        commit_map[commit["node"]] = commit

    assert len(commit_map) > 0
    return commit_map
コード例 #30
0
    def get_labels(self):
        classes = {}

        for commit_data in repository.get_commits():
            classes[commit_data[
                "node"]] = 1 if commit_data["ever_backedout"] else 0

        print("{} commits were backed out".format(
            sum(1 for label in classes.values() if label == 1)))
        print("{} commits were not backed out".format(
            sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]
コード例 #31
0
    def update_commit_db(self):
        repository.clone(self.repo_dir)

        if db.is_old_version(repository.COMMITS_DB) or not db.exists(
            repository.COMMITS_DB
        ):
            db.download(repository.COMMITS_DB, force=True, support_files_too=True)

        for commit in repository.get_commits():
            pass

        rev_start = "children({})".format(commit["node"])

        repository.download_commits(self.repo_dir, rev_start)
コード例 #32
0
ファイル: bug_features.py プロジェクト: mythmon/bugbug
def get_author_ids():
    author_ids = set()
    for commit in repository.get_commits():
        author_ids.add(commit["author_email"])
    return author_ids