def get_commits_to_ignore(self) -> None: assert db.download(repository.COMMITS_DB) ignored = set() commits_to_ignore = [] all_commits = set() annotate_ignore_nodes = { node for node, label in labels.get_labels("annotateignore") if label == "1" } for commit in repository.get_commits(include_no_bug=True, include_backouts=True, include_ignored=True): all_commits.add(commit["node"][:12]) if (commit["ignored"] or commit["backedoutby"] or not commit["bug_id"] or len(commit["backsout"]) > 0 or repository.is_wptsync(commit) or commit["node"] in annotate_ignore_nodes): commits_to_ignore.append({ "rev": commit["node"], "type": "backedout" if commit["backedoutby"] else "", }) ignored.add(commit["node"][:12]) if len(commit["backsout"]) > 0: for backedout in commit["backsout"]: if backedout[:12] in ignored: continue ignored.add(backedout[:12]) commits_to_ignore.append({ "rev": backedout, "type": "backedout" }) logger.info(f"{len(commits_to_ignore)} commits to ignore...") # Skip backed-out commits which aren't in the repository (commits which landed *before* the Mercurial history # started, and backouts which mentioned a bad hash in their message). commits_to_ignore = [ c for c in commits_to_ignore if c["rev"][:12] in all_commits ] logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info("...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"))) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)
def get_labels(self): classes = {} regressors = set( r["bug_introducing_rev"] for r in db.read(BUG_INTRODUCING_COMMITS_DB) if r["bug_introducing_rev"] ) regressor_bugs = set( sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), []) ) for commit_data in repository.get_commits(): if commit_data["backedoutby"]: continue if repository.is_wptsync(commit_data): continue push_date = dateutil.parser.parse(commit_data["pushdate"]) # Skip commits used for the evaluation phase. if push_date > datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS): continue node = commit_data["node"] if node in regressors or commit_data["bug_id"] in regressor_bugs: classes[node] = 1 else: # The labels we have are only from two years and six months ago (see the regressor finder script). if push_date < datetime.utcnow() - relativedelta(years=2, months=6): continue # We remove the last 6 months, as there could be regressions which haven't been filed yet. if push_date > datetime.utcnow() - relativedelta(months=6): continue classes[node] = 0 print( "{} commits caused regressions".format( sum(1 for label in classes.values() if label == 1) ) ) print( "{} commits did not cause regressions".format( sum(1 for label in classes.values() if label == 0) ) ) return classes, [0, 1]
def generate_all_data() -> Generator[Dict[str, Any], None, None]: past_failures = test_scheduling.get_past_failures( granularity, False) push_num = past_failures[ "push_num"] if "push_num" in past_failures else 0 commit_map = {} for commit_data in tqdm(repository.get_commits()): commit_map[commit_data["node"]] = commit_data # Store all runnables in the past_failures DB so it can be used in the evaluation phase. past_failures["all_runnables"] = all_runnables # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the # same as the current ones? saved_nodes = set() skipped_no_commits = 0 skipped_too_big_commits = 0 skipped_no_runnables = 0 if granularity in ("group", "config_group"): update_touched_together_gen = test_scheduling.update_touched_together( ) next(update_touched_together_gen) for ( i, ( revisions, fix_revision, push_runnables, possible_regressions, likely_regressions, ), ) in enumerate(tqdm(push_data_iter(), total=push_data_count)): push_num += 1 # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them. commits = tuple( commit_map.pop(revision) for revision in revisions if revision in commit_map) if len(commits) == 0: skipped_no_commits += 1 continue # Skip wptsync commits, since they are not like normal pushes made by developers. if any(repository.is_wptsync(commit) for commit in commits): continue merged_commits = commit_features.merge_commits(commits) # XXX: For now, skip commits which are too large. # In the future we can either: # - Improve shelve perf and go back to consider all files; # - Consider only files which appear with a given frequency, like the "files" feature in commit_features; # - Keep a limit of number of files. if len(merged_commits["files"]) > 50: skipped_too_big_commits += 1 continue # If we considered all_runnables, we'd generate a huge amount of data. # We consider only the runnables which run in this push, and the possible and likely regressions # from this push. We can't consider all runnables because we can't be sure that a task that didn't # run on a push would have been successful. runnables_to_consider = list( set(push_runnables + possible_regressions + likely_regressions)) if len(runnables_to_consider) == 0: skipped_no_runnables += 1 continue # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!). if i % 250 == 0: past_failures.sync() pushdate = dateutil.parser.parse(merged_commits["pushdate"]) if granularity in ("group", "config_group"): update_touched_together_gen.send(commits[0]["node"]) result_data = [] for data in test_scheduling.generate_data( granularity, past_failures, merged_commits, push_num, runnables_to_consider, possible_regressions, likely_regressions, ): if pushdate > HISTORY_DATE_START: result_data.append(data) if pushdate > HISTORY_DATE_START: saved_nodes.add(i) yield { "revs": revisions, "data": result_data, } if granularity == "group": try: update_touched_together_gen.send(None) except StopIteration: pass logger.info(f"saved push data nodes: {len(saved_nodes)}") logger.info(f"skipped {skipped_no_commits} (no commits in our DB)") logger.info(f"skipped {skipped_too_big_commits} (too big commits)") logger.info( f"skipped {skipped_no_runnables} (no interesting runnables)") past_failures["push_num"] = push_num past_failures.close()
def overwrite_classes(self, commits, classes, probabilities): for i, commit in enumerate(commits): if repository.is_wptsync(commit): classes[i] = 0 if not probabilities else [1.0, 0.0] return classes
def evaluation(self) -> None: bug_regressors = set( sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), []) ) commits = [] for commit_data in repository.get_commits(): if commit_data["backedoutby"]: continue if repository.is_wptsync(commit_data): continue push_date = dateutil.parser.parse(commit_data["pushdate"]) # Use the past two months of data (make sure it is not also used for training!). if push_date < datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS): continue commits.append(commit_data) print(f"{len(commits)} commits in the evaluation set") bugs_num = len(set(commit["bug_id"] for commit in commits)) print(f"{bugs_num} bugs in the evaluation set") # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID. commits.sort(key=lambda x: x["bug_id"]) results = [] for bug_id, commit_iter in itertools.groupby(commits, lambda x: x["bug_id"]): probs = self.classify(list(commit_iter), probabilities=True) results.append((max(probs[:, 1]), bug_id in bug_regressors)) # Let's define the risk bands relatively to average risk. # On average, around 1 out of 10 (8%) patches cause regressions. # Risk band 1 - around 1 out of 20 (4%) patches within this risk band cause regressions. # Risk band 2 - around 1 out of 10 (8%) patches within this risk band cause regressions. # Risk bank 3 - around 1 out of 5 (16%) patches within this risk band cause regressions. # Step 1. Calculate % of patches which cause regressions. total_landings = len(results) total_regressions = sum(1 for _, is_reg in results if is_reg) average_regression_rate = total_regressions / total_landings print(f"Average risk is {average_regression_rate}") MIN_SAMPLE = 200 # Step 2. Define risk band 1 (half than average risk). max_band1_prob = 1.0 total_landings = 0 total_regressions = 0 results.sort(key=lambda x: x[0]) for prob, is_reg in results: total_landings += 1 if is_reg: total_regressions += 1 if total_landings < MIN_SAMPLE: continue print( f"{total_regressions} out of {total_landings} patches with risk lower than {prob} caused regressions ({total_regressions / total_landings}" ) # No need to go further, since we are interested in half than average risk. if ( total_regressions / total_landings >= (average_regression_rate / 2) + 0.01 ): max_band1_prob = prob break print("\n\n") # Step 3. Define risk band 3 (double than average risk). min_band3_prob = 0.0 total_landings = 0 total_regressions = 0 results.sort(key=lambda x: x[0], reverse=True) for prob, is_reg in results: total_landings += 1 if is_reg: total_regressions += 1 if total_landings < MIN_SAMPLE: continue print( f"{total_regressions} out of {total_landings} patches with risk higher than {prob} caused regressions ({total_regressions / total_landings}" ) # No need to go further, since we are interested in double than average risk. if ( total_regressions / total_landings <= (average_regression_rate * 2) - 0.01 ): min_band3_prob = prob break print("\n\n") # Step 4. Define risk band 2 (average risk). results.sort(key=lambda x: x[0]) for prob_start in np.arange(max_band1_prob / 2, max_band1_prob + 0.02, 0.01): for prob_end in np.arange(min_band3_prob - 0.02, 0.99, 0.01): total_landings = 0 total_regressions = 0 for prob, is_reg in results: if prob < prob_start or prob > prob_end: continue total_landings += 1 if is_reg: total_regressions += 1 if total_landings < MIN_SAMPLE: continue if ( (average_regression_rate / 2) + 0.01 > total_regressions / total_landings > (average_regression_rate * 2) - 0.01 ): continue print( f"{total_regressions} out of {total_landings} patches with risk between {prob_start} and {prob_end} caused regressions ({total_regressions / total_landings}" )
def get_labels(self): classes = {} if self.use_finder or self.exclude_finder: if self.finder_regressions_only: regression_fixes = set( bug_fixing_commit["rev"] for bug_fixing_commit in db.read(BUG_FIXING_COMMITS_DB) if bug_fixing_commit["type"] == "r" ) regressors = set( r["bug_introducing_rev"] for r in db.read(BUG_INTRODUCING_COMMITS_DB) if r["bug_introducing_rev"] and ( not self.finder_regressions_only or r["bug_fixing_rev"] in regression_fixes ) ) regressor_bugs = set( sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), []) ) for commit_data in repository.get_commits(): if commit_data["backedoutby"]: continue if repository.is_wptsync(commit_data): continue push_date = dateutil.parser.parse(commit_data["pushdate"]) # Skip commits used for the evaluation phase. if push_date > datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS): continue node = commit_data["node"] if commit_data["bug_id"] in regressor_bugs or ( self.use_finder and node in regressors ): classes[node] = 1 elif not self.exclude_finder or node not in regressors: # The labels we have are only from two years ago (see https://groups.google.com/g/mozilla.dev.platform/c/SjjW6_O-FqM/m/G-CrIVT2BAAJ). # While we can go further back with the regressor finder script, it isn't remotely # as precise as the "Regressed By" data. # In the future, we might want to re-evaluate this limit (e.g. extend ), but we # have to be careful (using too old patches might cause worse results as patch # characteristics evolve over time). if push_date < datetime.utcnow() - relativedelta(years=2): continue # We remove the last 3 months, as there could be regressions which haven't been # filed yet. While it is true that some regressions might not be found for a long # time, more than 3 months seems overly conservative. # There will be some patches we currently add to the clean set and will later move # to the regressor set, but they are a very small subset. if push_date > datetime.utcnow() - relativedelta(months=3): continue classes[node] = 0 print( "{} commits caused regressions".format( sum(1 for label in classes.values() if label == 1) ) ) print( "{} commits did not cause regressions".format( sum(1 for label in classes.values() if label == 0) ) ) return classes, [0, 1]