def eval_regression(self): results = {} model = RegressionModel.load('regressionmodel') for bug in bugzilla.get_bugs(): if self.is_regression(bug): results[bug['id']] = True elif self.is_feature(bug): results[bug['id']] = False else: results[bug['id']] = True if model.classify(bug)[0] == 1 else False with open('regression.json', 'w') as f: json.dump(results, f)
def eval_regression(self): results = {} model = RegressionModel.load('regressionmodel') for bug in bugzilla.get_bugs(): if self.is_regression(bug): results[bug['id']] = True elif self.is_feature(bug): results[bug['id']] = False else: results[bug['id']] = True if model.classify( bug)[0] == 1 else False with open('regression.json', 'w') as f: json.dump(results, f)
def __init__(self): super(Regression, self).__init__() self.model = RegressionModel.load(self.retrieve_model('regression')) self.autofix_regression = []
def find_bug_fixing_commits(self): logger.info("Downloading commits database...") if db.is_old_version( repository.COMMITS_DB) or not db.exists(repository.COMMITS_DB): db.download(repository.COMMITS_DB, force=True) logger.info("Downloading bugs database...") if db.is_old_version( bugzilla.BUGS_DB) or not db.exists(bugzilla.BUGS_DB): db.download(bugzilla.BUGS_DB, force=True) logger.info("Download previous classifications...") if db.is_old_version( BUG_FIXING_COMMITS_DB) or not db.exists(BUG_FIXING_COMMITS_DB): db.download(BUG_FIXING_COMMITS_DB, force=True) logger.info("Get previously classified commits...") prev_bug_fixing_commits = list(db.read(BUG_FIXING_COMMITS_DB)) prev_bug_fixing_commits_nodes = set( bug_fixing_commit["rev"] for bug_fixing_commit in prev_bug_fixing_commits) logger.info( f"Already classified {len(prev_bug_fixing_commits)} commits...") # TODO: Switch to the pure Defect model, as it's better in this case. logger.info("Downloading defect/enhancement/task model...") download_model("defectenhancementtask") defect_model = DefectEnhancementTaskModel.load( "defectenhancementtaskmodel") logger.info("Downloading regression model...") download_model("regression") regression_model = RegressionModel.load("regressionmodel") start_date = datetime.now() - RELATIVE_START_DATE end_date = datetime.now() - RELATIVE_END_DATE logger.info( f"Gathering bug IDs associated to commits (since {start_date} and up to {end_date})..." ) commit_map = defaultdict(list) for commit in repository.get_commits(): if commit["node"] in prev_bug_fixing_commits_nodes: continue commit_date = dateutil.parser.parse(commit["pushdate"]) if commit_date < start_date or commit_date > end_date: continue commit_map[commit["bug_id"]].append(commit["node"]) logger.info( f"{sum(len(commit_list) for commit_list in commit_map.values())} commits found, {len(commit_map)} bugs linked to commits" ) assert len(commit_map) > 0 def get_relevant_bugs(): return (bug for bug in bugzilla.get_bugs() if bug["id"] in commit_map) bug_count = sum(1 for bug in get_relevant_bugs()) logger.info( f"{bug_count} bugs in total, {len(commit_map) - bug_count} bugs linked to commits missing" ) known_defect_labels = defect_model.get_labels() known_regression_labels = regression_model.get_labels() bug_fixing_commits = [] def append_bug_fixing_commits(bug_id, type_): for commit in commit_map[bug_id]: bug_fixing_commits.append({"rev": commit, "type": type_}) for bug in tqdm(get_relevant_bugs(), total=bug_count): # Ignore bugs which are not linked to the commits we care about. if bug["id"] not in commit_map: continue # If we know the label already, we don't need to apply the model. if (bug["id"] in known_regression_labels and known_regression_labels[bug["id"]] == 1): append_bug_fixing_commits(bug["id"], "r") continue if bug["id"] in known_defect_labels: if known_defect_labels[bug["id"]] == "defect": append_bug_fixing_commits(bug["id"], "d") else: append_bug_fixing_commits(bug["id"], "e") continue if defect_model.classify(bug)[0] == "defect": if regression_model.classify(bug)[0] == 1: append_bug_fixing_commits(bug["id"], "r") else: append_bug_fixing_commits(bug["id"], "d") else: append_bug_fixing_commits(bug["id"], "e") db.append(BUG_FIXING_COMMITS_DB, bug_fixing_commits) zstd_compress(BUG_FIXING_COMMITS_DB) bug_fixing_commits = prev_bug_fixing_commits + bug_fixing_commits return [ bug_fixing_commit for bug_fixing_commit in bug_fixing_commits if bug_fixing_commit["type"] in ["r", "d"] ]
def train_regression(self): logger.info("Training *regression vs non-regression* model") model = RegressionModel() model.train() self.compress_file("regressionmodel")
parser.add_argument( "--goal", help="Goal of the labeler", choices=["str", "regressionrange"], default="str", ) args = parser.parse_args() if args.goal == "str": from bugbug.models.bug import BugModel model = BugModel.load("bugmodel") elif args.goal == "regressionrange": from bugbug.models.regression import RegressionModel model = RegressionModel.load("regressionmodel") file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv") with open(file_path, "r") as f: reader = csv.reader(f) next(reader) labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader] already_done = set((c[0], c[1]) for c in labeled_comments) bugs = [] for bug in bugzilla.get_bugs(): # For the str and regressionrange problems, we don't care about test failures, if ("intermittent-failure" in bug["keywords"] or "stockwell" in bug["whiteboard"]
parser.add_argument( "--goal", help="Goal of the labeler", choices=["str", "regressionrange"], default="str", ) args = parser.parse_args() if args.goal == "str": from bugbug.models.bug import BugModel model = BugModel.load("bugmodel") elif args.goal == "regressionrange": from bugbug.models.regression import RegressionModel model = RegressionModel.load("regressionmodel") file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv") with open(file_path, "r") as f: reader = csv.reader(f) next(reader) labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader] already_done = set((c[0], c[1]) for c in labeled_comments) bugs = [] for bug in bugzilla.get_bugs(): # For the str and regressionrange problems, we don't care about test failures, if ( "intermittent-failure" in bug["keywords"]
def test_get_regression_labels(): model = RegressionModel() classes, _ = model.get_labels() assert classes[447581] == 0 assert classes[518272] == 1
def __init__(self): super().__init__() self.model = RegressionModel.load(self.retrieve_model()) self.autofix_regression = []