Example #1
0
    def eval_regression(self):
        results = {}

        model = RegressionModel.load('regressionmodel')
        for bug in bugzilla.get_bugs():
            if self.is_regression(bug):
                results[bug['id']] = True
            elif self.is_feature(bug):
                results[bug['id']] = False
            else:
                results[bug['id']] = True if model.classify(bug)[0] == 1 else False

        with open('regression.json', 'w') as f:
            json.dump(results, f)
Example #2
0
    def eval_regression(self):
        results = {}

        model = RegressionModel.load('regressionmodel')
        for bug in bugzilla.get_bugs():
            if self.is_regression(bug):
                results[bug['id']] = True
            elif self.is_feature(bug):
                results[bug['id']] = False
            else:
                results[bug['id']] = True if model.classify(
                    bug)[0] == 1 else False

        with open('regression.json', 'w') as f:
            json.dump(results, f)
Example #3
0
 def __init__(self):
     super(Regression, self).__init__()
     self.model = RegressionModel.load(self.retrieve_model('regression'))
     self.autofix_regression = []
Example #4
0
    def find_bug_fixing_commits(self):
        logger.info("Downloading commits database...")
        if db.is_old_version(
                repository.COMMITS_DB) or not db.exists(repository.COMMITS_DB):
            db.download(repository.COMMITS_DB, force=True)

        logger.info("Downloading bugs database...")
        if db.is_old_version(
                bugzilla.BUGS_DB) or not db.exists(bugzilla.BUGS_DB):
            db.download(bugzilla.BUGS_DB, force=True)

        logger.info("Download previous classifications...")
        if db.is_old_version(
                BUG_FIXING_COMMITS_DB) or not db.exists(BUG_FIXING_COMMITS_DB):
            db.download(BUG_FIXING_COMMITS_DB, force=True)

        logger.info("Get previously classified commits...")
        prev_bug_fixing_commits = list(db.read(BUG_FIXING_COMMITS_DB))
        prev_bug_fixing_commits_nodes = set(
            bug_fixing_commit["rev"]
            for bug_fixing_commit in prev_bug_fixing_commits)
        logger.info(
            f"Already classified {len(prev_bug_fixing_commits)} commits...")

        # TODO: Switch to the pure Defect model, as it's better in this case.
        logger.info("Downloading defect/enhancement/task model...")
        download_model("defectenhancementtask")
        defect_model = DefectEnhancementTaskModel.load(
            "defectenhancementtaskmodel")

        logger.info("Downloading regression model...")
        download_model("regression")
        regression_model = RegressionModel.load("regressionmodel")

        start_date = datetime.now() - RELATIVE_START_DATE
        end_date = datetime.now() - RELATIVE_END_DATE
        logger.info(
            f"Gathering bug IDs associated to commits (since {start_date} and up to {end_date})..."
        )
        commit_map = defaultdict(list)
        for commit in repository.get_commits():
            if commit["node"] in prev_bug_fixing_commits_nodes:
                continue

            commit_date = dateutil.parser.parse(commit["pushdate"])
            if commit_date < start_date or commit_date > end_date:
                continue

            commit_map[commit["bug_id"]].append(commit["node"])

        logger.info(
            f"{sum(len(commit_list) for commit_list in commit_map.values())} commits found, {len(commit_map)} bugs linked to commits"
        )
        assert len(commit_map) > 0

        def get_relevant_bugs():
            return (bug for bug in bugzilla.get_bugs()
                    if bug["id"] in commit_map)

        bug_count = sum(1 for bug in get_relevant_bugs())
        logger.info(
            f"{bug_count} bugs in total, {len(commit_map) - bug_count} bugs linked to commits missing"
        )

        known_defect_labels = defect_model.get_labels()
        known_regression_labels = regression_model.get_labels()

        bug_fixing_commits = []

        def append_bug_fixing_commits(bug_id, type_):
            for commit in commit_map[bug_id]:
                bug_fixing_commits.append({"rev": commit, "type": type_})

        for bug in tqdm(get_relevant_bugs(), total=bug_count):
            # Ignore bugs which are not linked to the commits we care about.
            if bug["id"] not in commit_map:
                continue

            # If we know the label already, we don't need to apply the model.
            if (bug["id"] in known_regression_labels
                    and known_regression_labels[bug["id"]] == 1):
                append_bug_fixing_commits(bug["id"], "r")
                continue

            if bug["id"] in known_defect_labels:
                if known_defect_labels[bug["id"]] == "defect":
                    append_bug_fixing_commits(bug["id"], "d")
                else:
                    append_bug_fixing_commits(bug["id"], "e")
                continue

            if defect_model.classify(bug)[0] == "defect":
                if regression_model.classify(bug)[0] == 1:
                    append_bug_fixing_commits(bug["id"], "r")
                else:
                    append_bug_fixing_commits(bug["id"], "d")
            else:
                append_bug_fixing_commits(bug["id"], "e")

        db.append(BUG_FIXING_COMMITS_DB, bug_fixing_commits)
        zstd_compress(BUG_FIXING_COMMITS_DB)

        bug_fixing_commits = prev_bug_fixing_commits + bug_fixing_commits
        return [
            bug_fixing_commit for bug_fixing_commit in bug_fixing_commits
            if bug_fixing_commit["type"] in ["r", "d"]
        ]
Example #5
0
 def train_regression(self):
     logger.info("Training *regression vs non-regression* model")
     model = RegressionModel()
     model.train()
     self.compress_file("regressionmodel")
Example #6
0
 def train_regression(self):
     logger.info("Training *regression vs non-regression* model")
     model = RegressionModel()
     model.train()
     self.compress_file("regressionmodel")
Example #7
0
parser.add_argument(
    "--goal",
    help="Goal of the labeler",
    choices=["str", "regressionrange"],
    default="str",
)
args = parser.parse_args()

if args.goal == "str":
    from bugbug.models.bug import BugModel

    model = BugModel.load("bugmodel")
elif args.goal == "regressionrange":
    from bugbug.models.regression import RegressionModel

    model = RegressionModel.load("regressionmodel")

file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv")

with open(file_path, "r") as f:
    reader = csv.reader(f)
    next(reader)
    labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader]

already_done = set((c[0], c[1]) for c in labeled_comments)

bugs = []
for bug in bugzilla.get_bugs():
    # For the str and regressionrange problems, we don't care about test failures,
    if ("intermittent-failure" in bug["keywords"]
            or "stockwell" in bug["whiteboard"]
Example #8
0
parser.add_argument(
    "--goal",
    help="Goal of the labeler",
    choices=["str", "regressionrange"],
    default="str",
)
args = parser.parse_args()

if args.goal == "str":
    from bugbug.models.bug import BugModel

    model = BugModel.load("bugmodel")
elif args.goal == "regressionrange":
    from bugbug.models.regression import RegressionModel

    model = RegressionModel.load("regressionmodel")

file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv")

with open(file_path, "r") as f:
    reader = csv.reader(f)
    next(reader)
    labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader]

already_done = set((c[0], c[1]) for c in labeled_comments)

bugs = []
for bug in bugzilla.get_bugs():
    # For the str and regressionrange problems, we don't care about test failures,
    if (
        "intermittent-failure" in bug["keywords"]
Example #9
0
def test_get_regression_labels():
    model = RegressionModel()
    classes, _ = model.get_labels()
    assert classes[447581] == 0
    assert classes[518272] == 1
Example #10
0
 def __init__(self):
     super().__init__()
     self.model = RegressionModel.load(self.retrieve_model())
     self.autofix_regression = []