def classify_bug(model_name, bug_ids, bugzilla_token): from bugbug_http.app import JobInfo # This should be called in a process worker so it should be safe to set # the token here bug_ids_set = set(map(int, bug_ids)) bugzilla.set_token(bugzilla_token) bugs = bugzilla.get(bug_ids) missing_bugs = bug_ids_set.difference(bugs.keys()) for bug_id in missing_bugs: job = JobInfo(classify_bug, model_name, bug_id) # TODO: Find a better error format encoded_data = json.dumps({"available": False}) setkey(job.result_key, encoded_data) if not bugs: return "NOK" model = get_model(model_name) if not model: LOGGER.info("Missing model %r, aborting" % model_name) return "NOK" model_extra_data = model.get_extra_data() # TODO: Classify could choke on a single bug which could make the whole # job to fails. What should we do here? probs = model.classify(list(bugs.values()), True) indexes = probs.argmax(axis=-1) suggestions = model.le.inverse_transform(indexes) probs_list = probs.tolist() indexes_list = indexes.tolist() suggestions_list = suggestions.tolist() for i, bug_id in enumerate(bugs.keys()): data = { "prob": probs_list[i], "index": indexes_list[i], "class": suggestions_list[i], "extra_data": model_extra_data, } encoded_data = json.dumps(data) job = JobInfo(classify_bug, model_name, bug_id) setkey(job.result_key, encoded_data) # Save the bug last change setkey(job.change_time_key, bugs[bug_id]["last_change_time"], expiration=0) return "OK"
def classify_issue( model_name: str, owner: str, repo: str, issue_nums: Sequence[int] ) -> str: from bugbug_http.app import JobInfo github = Github(owner=owner, repo=repo) issue_ids_set = set(map(int, issue_nums)) issues = { issue_num: github.fetch_issue_by_number(owner, repo, issue_num, True) for issue_num in issue_nums } missing_issues = issue_ids_set.difference(issues.keys()) for issue_id in missing_issues: job = JobInfo(classify_issue, model_name, owner, repo, issue_id) # TODO: Find a better error format setkey(job.result_key, orjson.dumps({"available": False})) if not issues: return "NOK" model = MODEL_CACHE.get(model_name) if not model: LOGGER.info("Missing model %r, aborting" % model_name) return "NOK" model_extra_data = model.get_extra_data() # TODO: Classify could choke on a single bug which could make the whole # job to fail. What should we do here? probs = model.classify(list(issues.values()), True) indexes = probs.argmax(axis=-1) suggestions = model.le.inverse_transform(indexes) probs_list = probs.tolist() indexes_list = indexes.tolist() suggestions_list = suggestions.tolist() for i, issue_id in enumerate(issues.keys()): data = { "prob": probs_list[i], "index": indexes_list[i], "class": suggestions_list[i], "extra_data": model_extra_data, } job = JobInfo(classify_issue, model_name, owner, repo, issue_id) setkey(job.result_key, orjson.dumps(data), compress=True) # Save the bug last change setkey(job.change_time_key, issues[issue_id]["updated_at"].encode()) return "OK"
def classify_bug(model_name: str, bug_ids: Sequence[int], bugzilla_token: str) -> str: from bugbug_http.app import JobInfo # This should be called in a process worker so it should be safe to set # the token here bug_ids_set = set(map(int, bug_ids)) bugzilla.set_token(bugzilla_token) bugs = {} for i in range(0, len(bug_ids), Bugzilla.BUGZILLA_CHUNK_SIZE): bugs.update(bugzilla.get(bug_ids[i:(i + Bugzilla.BUGZILLA_CHUNK_SIZE)])) missing_bugs = bug_ids_set.difference(bugs.keys()) for bug_id in missing_bugs: job = JobInfo(classify_bug, model_name, bug_id) # TODO: Find a better error format setkey(job.result_key, orjson.dumps({"available": False})) if not bugs: return "NOK" model = MODEL_CACHE.get(model_name) if not model: LOGGER.info("Missing model %r, aborting" % model_name) return "NOK" model_extra_data = model.get_extra_data() # TODO: Classify could choke on a single bug which could make the whole # job to fails. What should we do here? probs = model.classify(list(bugs.values()), True) indexes = probs.argmax(axis=-1) suggestions = model.le.inverse_transform(indexes) probs_list = probs.tolist() indexes_list = indexes.tolist() suggestions_list = suggestions.tolist() for i, bug_id in enumerate(bugs.keys()): data = { "prob": probs_list[i], "index": indexes_list[i], "class": suggestions_list[i], "extra_data": model_extra_data, } job = JobInfo(classify_bug, model_name, bug_id) setkey(job.result_key, orjson.dumps(data), compress=True) # Save the bug last change setkey(job.change_time_key, bugs[bug_id]["last_change_time"].encode()) return "OK"
def schedule_tests(branch, rev): from bugbug_http.app import JobInfo job = JobInfo(schedule_tests, branch, rev) LOGGER.debug("Processing {job}") url = f"https://hg.mozilla.org/{branch}/json-automationrelevance/{rev}" r = requests.get(url) if r.status_code == 404: LOGGER.warning(f"Push not found at {url}!") return "NOK" first_rev = r.json()["changesets"][0]["node"] if first_rev != rev: revset = f"{first_rev}::{rev}" else: revset = rev # TODO Return real data based on 'revset' def get_data(revset): return { "tasks": ["test-macosx1014-64/debug-gtest-1proc"], "groups": [ "caps/test/unit/xpcshell.ini", "dom/indexedDB/test/mochitest.ini", ], } data = get_data(revset) encoded_data = json.dumps(data) setkey(job.result_key, encoded_data) return "OK"
def get_config_specific_groups(config: str) -> str: from bugbug_http.app import JobInfo job = JobInfo(get_config_specific_groups, config) LOGGER.info(f"Processing {job}...") testgroupselect_model = MODEL_CACHE.get("testgroupselect") equivalence_sets = testgroupselect_model._get_equivalence_sets(0.9) past_failures_data = test_scheduling.get_past_failures("group", True) all_runnables = past_failures_data["all_runnables"] setkey( job.result_key, orjson.dumps( [ {"name": group} for group in all_runnables if any( equivalence_set == {config} for equivalence_set in equivalence_sets[group] ) ] ), compress=True, ) return "OK"
def schedule_tests(branch, rev): from bugbug_http.app import JobInfo from bugbug_http import REPO_DIR job = JobInfo(schedule_tests, branch, rev) LOGGER.debug(f"Processing {job}") # Load the full stack of patches leading to that revision try: stack = get_hgmo_stack(branch, rev) except requests.exceptions.RequestException: LOGGER.warning(f"Push not found for {branch} @ {rev}!") return "NOK" # Apply the stack on the local repository try: revs = repository.apply_stack(REPO_DIR, stack, branch) except Exception as e: LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}") return "NOK" test_selection_threshold = float( os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.3)) # Analyze patches. commits = repository.download_commits(REPO_DIR, revs=revs, save=False, use_single_process=True) tasks = MODEL_CACHE.get("testlabelselect").select_tests( commits, test_selection_threshold) reduced = MODEL_CACHE.get("testlabelselect").reduce( set(t for t, c in tasks.items() if c >= 0.7), 1.0) data = { "tasks": tasks, "groups": MODEL_CACHE.get("testgroupselect").select_tests( commits, test_selection_threshold), "reduced_tasks": {t: c for t, c in tasks.items() if t in reduced}, } setkey(job.result_key, orjson.dumps(data)) return "OK"
def schedule_tests(branch, rev): from bugbug_http.app import JobInfo from bugbug_http import REPO_DIR job = JobInfo(schedule_tests, branch, rev) LOGGER.debug(f"Processing {job}") # Load the full stack of patches leading to that revision try: stack = get_hgmo_stack(branch, rev) except requests.exceptions.RequestException: LOGGER.warning(f"Push not found for {branch} @ {rev}!") return "NOK" # Apply the stack on the local repository # Autoland should always rebase on top of parents, never on tip default_base = "tip" if branch != "integration/autoland" else None try: apply_stack(REPO_DIR, stack, branch, default_base) except Exception as e: LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}") return "NOK" first_rev = stack[0]["node"] if first_rev != rev: revset = f"{first_rev}::{rev}" else: revset = rev # TODO Return real data based on 'revset' def get_data(revset): return { "tasks": ["test-macosx1014-64/debug-gtest-1proc"], "groups": [ "caps/test/unit/xpcshell.ini", "dom/indexedDB/test/mochitest.ini", ], } data = get_data(revset) encoded_data = json.dumps(data) setkey(job.result_key, encoded_data) return "OK"
def schedule_tests(branch: str, rev: str) -> str: from bugbug_http.app import JobInfo from bugbug_http import REPO_DIR job = JobInfo(schedule_tests, branch, rev) LOGGER.info(f"Processing {job}...") # Pull the revision to the local repository LOGGER.info("Pulling commits from the remote repository...") repository.pull(REPO_DIR, branch, rev) # Load the full stack of patches leading to that revision LOGGER.info("Loading commits to analyze using automationrelevance...") try: revs = get_hgmo_stack(branch, rev) except requests.exceptions.RequestException: LOGGER.warning(f"Push not found for {branch} @ {rev}!") return "NOK" test_selection_threshold = float( os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.5) ) # Analyze patches. commits = repository.download_commits( REPO_DIR, revs=revs, save=False, use_single_process=True, include_no_bug=True ) if len(commits) > 0: testlabelselect_model = MODEL_CACHE.get("testlabelselect") testgroupselect_model = MODEL_CACHE.get("testgroupselect") tasks = testlabelselect_model.select_tests(commits, test_selection_threshold) reduced = testlabelselect_model.reduce( set(t for t, c in tasks.items() if c >= 0.8), 1.0 ) reduced_higher = testlabelselect_model.reduce( set(t for t, c in tasks.items() if c >= 0.9), 1.0 ) groups = testgroupselect_model.select_tests(commits, test_selection_threshold) config_groups = testgroupselect_model.select_configs(groups.keys(), 1.0) else: tasks = {} reduced = {} groups = {} config_groups = {} data = { "tasks": tasks, "groups": groups, "config_groups": config_groups, "reduced_tasks": {t: c for t, c in tasks.items() if t in reduced}, "reduced_tasks_higher": {t: c for t, c in tasks.items() if t in reduced_higher}, "known_tasks": get_known_tasks(), } setkey(job.result_key, orjson.dumps(data), compress=True) return "OK"
def schedule_tests(branch, rev): from bugbug_http.app import JobInfo from bugbug_http import REPO_DIR job = JobInfo(schedule_tests, branch, rev) LOGGER.debug(f"Processing {job}") # Load the full stack of patches leading to that revision try: stack = get_hgmo_stack(branch, rev) except requests.exceptions.RequestException: LOGGER.warning(f"Push not found for {branch} @ {rev}!") return "NOK" # Apply the stack on the local repository try: revs = repository.apply_stack(REPO_DIR, stack, branch) except Exception as e: LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}") return "NOK" test_selection_threshold = float( os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.3)) # Analyze patches. commits = repository.download_commits(REPO_DIR, revs=revs, save=False, use_single_process=True) commit_data = commit_features.merge_commits(commits) def get_runnables(granularity): past_failures_data = test_scheduling.get_past_failures(granularity) push_num = past_failures_data["push_num"] all_runnables = past_failures_data["all_runnables"] commit_tests = [] for data in test_scheduling.generate_data(past_failures_data, commit_data, push_num, all_runnables, [], []): if granularity == "label" and not data["name"].startswith("test-"): continue commit_test = commit_data.copy() commit_test["test_job"] = data commit_tests.append(commit_test) probs = MODEL_CACHE.get(f"test{granularity}select").classify( commit_tests, probabilities=True) selected_indexes = np.argwhere( probs[:, 1] > test_selection_threshold)[:, 0] return { commit_tests[i]["test_job"]["name"]: math.floor(probs[i, 1] * 100) / 100 for i in selected_indexes } data = { "tasks": get_runnables("label"), "groups": get_runnables("group"), } setkey(job.result_key, orjson.dumps(data)) return "OK"