Exemple #1
0
def classify_bug(model_name, bug_ids, bugzilla_token):
    from bugbug_http.app import JobInfo

    # This should be called in a process worker so it should be safe to set
    # the token here
    bug_ids_set = set(map(int, bug_ids))
    bugzilla.set_token(bugzilla_token)
    bugs = bugzilla.get(bug_ids)

    missing_bugs = bug_ids_set.difference(bugs.keys())

    for bug_id in missing_bugs:
        job = JobInfo(classify_bug, model_name, bug_id)

        # TODO: Find a better error format
        encoded_data = json.dumps({"available": False})
        setkey(job.result_key, encoded_data)

    if not bugs:
        return "NOK"

    model = get_model(model_name)

    if not model:
        LOGGER.info("Missing model %r, aborting" % model_name)
        return "NOK"

    model_extra_data = model.get_extra_data()

    # TODO: Classify could choke on a single bug which could make the whole
    # job to fails. What should we do here?
    probs = model.classify(list(bugs.values()), True)
    indexes = probs.argmax(axis=-1)
    suggestions = model.le.inverse_transform(indexes)

    probs_list = probs.tolist()
    indexes_list = indexes.tolist()
    suggestions_list = suggestions.tolist()

    for i, bug_id in enumerate(bugs.keys()):
        data = {
            "prob": probs_list[i],
            "index": indexes_list[i],
            "class": suggestions_list[i],
            "extra_data": model_extra_data,
        }

        encoded_data = json.dumps(data)

        job = JobInfo(classify_bug, model_name, bug_id)
        setkey(job.result_key, encoded_data)

        # Save the bug last change
        setkey(job.change_time_key,
               bugs[bug_id]["last_change_time"],
               expiration=0)

    return "OK"
Exemple #2
0
def classify_issue(
    model_name: str, owner: str, repo: str, issue_nums: Sequence[int]
) -> str:
    from bugbug_http.app import JobInfo

    github = Github(owner=owner, repo=repo)

    issue_ids_set = set(map(int, issue_nums))

    issues = {
        issue_num: github.fetch_issue_by_number(owner, repo, issue_num, True)
        for issue_num in issue_nums
    }

    missing_issues = issue_ids_set.difference(issues.keys())

    for issue_id in missing_issues:
        job = JobInfo(classify_issue, model_name, owner, repo, issue_id)

        # TODO: Find a better error format
        setkey(job.result_key, orjson.dumps({"available": False}))

    if not issues:
        return "NOK"

    model = MODEL_CACHE.get(model_name)

    if not model:
        LOGGER.info("Missing model %r, aborting" % model_name)
        return "NOK"

    model_extra_data = model.get_extra_data()

    # TODO: Classify could choke on a single bug which could make the whole
    # job to fail. What should we do here?
    probs = model.classify(list(issues.values()), True)
    indexes = probs.argmax(axis=-1)
    suggestions = model.le.inverse_transform(indexes)

    probs_list = probs.tolist()
    indexes_list = indexes.tolist()
    suggestions_list = suggestions.tolist()

    for i, issue_id in enumerate(issues.keys()):
        data = {
            "prob": probs_list[i],
            "index": indexes_list[i],
            "class": suggestions_list[i],
            "extra_data": model_extra_data,
        }

        job = JobInfo(classify_issue, model_name, owner, repo, issue_id)
        setkey(job.result_key, orjson.dumps(data), compress=True)

        # Save the bug last change
        setkey(job.change_time_key, issues[issue_id]["updated_at"].encode())

    return "OK"
Exemple #3
0
def classify_bug(model_name: str, bug_ids: Sequence[int],
                 bugzilla_token: str) -> str:
    from bugbug_http.app import JobInfo

    # This should be called in a process worker so it should be safe to set
    # the token here
    bug_ids_set = set(map(int, bug_ids))
    bugzilla.set_token(bugzilla_token)

    bugs = {}
    for i in range(0, len(bug_ids), Bugzilla.BUGZILLA_CHUNK_SIZE):
        bugs.update(bugzilla.get(bug_ids[i:(i +
                                            Bugzilla.BUGZILLA_CHUNK_SIZE)]))

    missing_bugs = bug_ids_set.difference(bugs.keys())

    for bug_id in missing_bugs:
        job = JobInfo(classify_bug, model_name, bug_id)

        # TODO: Find a better error format
        setkey(job.result_key, orjson.dumps({"available": False}))

    if not bugs:
        return "NOK"

    model = MODEL_CACHE.get(model_name)

    if not model:
        LOGGER.info("Missing model %r, aborting" % model_name)
        return "NOK"

    model_extra_data = model.get_extra_data()

    # TODO: Classify could choke on a single bug which could make the whole
    # job to fails. What should we do here?
    probs = model.classify(list(bugs.values()), True)
    indexes = probs.argmax(axis=-1)
    suggestions = model.le.inverse_transform(indexes)

    probs_list = probs.tolist()
    indexes_list = indexes.tolist()
    suggestions_list = suggestions.tolist()

    for i, bug_id in enumerate(bugs.keys()):
        data = {
            "prob": probs_list[i],
            "index": indexes_list[i],
            "class": suggestions_list[i],
            "extra_data": model_extra_data,
        }

        job = JobInfo(classify_bug, model_name, bug_id)
        setkey(job.result_key, orjson.dumps(data), compress=True)

        # Save the bug last change
        setkey(job.change_time_key, bugs[bug_id]["last_change_time"].encode())

    return "OK"
Exemple #4
0
def schedule_tests(branch, rev):
    from bugbug_http.app import JobInfo

    job = JobInfo(schedule_tests, branch, rev)
    LOGGER.debug("Processing {job}")

    url = f"https://hg.mozilla.org/{branch}/json-automationrelevance/{rev}"
    r = requests.get(url)

    if r.status_code == 404:
        LOGGER.warning(f"Push not found at {url}!")
        return "NOK"

    first_rev = r.json()["changesets"][0]["node"]
    if first_rev != rev:
        revset = f"{first_rev}::{rev}"
    else:
        revset = rev

    # TODO Return real data based on 'revset'
    def get_data(revset):
        return {
            "tasks": ["test-macosx1014-64/debug-gtest-1proc"],
            "groups": [
                "caps/test/unit/xpcshell.ini",
                "dom/indexedDB/test/mochitest.ini",
            ],
        }

    data = get_data(revset)
    encoded_data = json.dumps(data)
    setkey(job.result_key, encoded_data)

    return "OK"
Exemple #5
0
def get_config_specific_groups(config: str) -> str:
    from bugbug_http.app import JobInfo

    job = JobInfo(get_config_specific_groups, config)
    LOGGER.info(f"Processing {job}...")

    testgroupselect_model = MODEL_CACHE.get("testgroupselect")
    equivalence_sets = testgroupselect_model._get_equivalence_sets(0.9)

    past_failures_data = test_scheduling.get_past_failures("group", True)
    all_runnables = past_failures_data["all_runnables"]

    setkey(
        job.result_key,
        orjson.dumps(
            [
                {"name": group}
                for group in all_runnables
                if any(
                    equivalence_set == {config}
                    for equivalence_set in equivalence_sets[group]
                )
            ]
        ),
        compress=True,
    )

    return "OK"
Exemple #6
0
def schedule_tests(branch, rev):
    from bugbug_http.app import JobInfo
    from bugbug_http import REPO_DIR

    job = JobInfo(schedule_tests, branch, rev)
    LOGGER.debug(f"Processing {job}")

    # Load the full stack of patches leading to that revision
    try:
        stack = get_hgmo_stack(branch, rev)
    except requests.exceptions.RequestException:
        LOGGER.warning(f"Push not found for {branch} @ {rev}!")
        return "NOK"

    # Apply the stack on the local repository
    try:
        revs = repository.apply_stack(REPO_DIR, stack, branch)
    except Exception as e:
        LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}")
        return "NOK"

    test_selection_threshold = float(
        os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.3))

    # Analyze patches.
    commits = repository.download_commits(REPO_DIR,
                                          revs=revs,
                                          save=False,
                                          use_single_process=True)

    tasks = MODEL_CACHE.get("testlabelselect").select_tests(
        commits, test_selection_threshold)

    reduced = MODEL_CACHE.get("testlabelselect").reduce(
        set(t for t, c in tasks.items() if c >= 0.7), 1.0)

    data = {
        "tasks":
        tasks,
        "groups":
        MODEL_CACHE.get("testgroupselect").select_tests(
            commits, test_selection_threshold),
        "reduced_tasks": {t: c
                          for t, c in tasks.items() if t in reduced},
    }
    setkey(job.result_key, orjson.dumps(data))

    return "OK"
Exemple #7
0
def schedule_tests(branch, rev):
    from bugbug_http.app import JobInfo
    from bugbug_http import REPO_DIR

    job = JobInfo(schedule_tests, branch, rev)
    LOGGER.debug(f"Processing {job}")

    # Load the full stack of patches leading to that revision
    try:
        stack = get_hgmo_stack(branch, rev)
    except requests.exceptions.RequestException:
        LOGGER.warning(f"Push not found for {branch} @ {rev}!")
        return "NOK"

    # Apply the stack on the local repository
    # Autoland should always rebase on top of parents, never on tip
    default_base = "tip" if branch != "integration/autoland" else None
    try:
        apply_stack(REPO_DIR, stack, branch, default_base)
    except Exception as e:
        LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}")
        return "NOK"

    first_rev = stack[0]["node"]
    if first_rev != rev:
        revset = f"{first_rev}::{rev}"
    else:
        revset = rev

    # TODO Return real data based on 'revset'
    def get_data(revset):
        return {
            "tasks": ["test-macosx1014-64/debug-gtest-1proc"],
            "groups": [
                "caps/test/unit/xpcshell.ini",
                "dom/indexedDB/test/mochitest.ini",
            ],
        }

    data = get_data(revset)
    encoded_data = json.dumps(data)
    setkey(job.result_key, encoded_data)

    return "OK"
Exemple #8
0
def schedule_tests(branch: str, rev: str) -> str:
    from bugbug_http.app import JobInfo
    from bugbug_http import REPO_DIR

    job = JobInfo(schedule_tests, branch, rev)
    LOGGER.info(f"Processing {job}...")

    # Pull the revision to the local repository
    LOGGER.info("Pulling commits from the remote repository...")
    repository.pull(REPO_DIR, branch, rev)

    # Load the full stack of patches leading to that revision
    LOGGER.info("Loading commits to analyze using automationrelevance...")
    try:
        revs = get_hgmo_stack(branch, rev)
    except requests.exceptions.RequestException:
        LOGGER.warning(f"Push not found for {branch} @ {rev}!")
        return "NOK"

    test_selection_threshold = float(
        os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.5)
    )

    # Analyze patches.
    commits = repository.download_commits(
        REPO_DIR, revs=revs, save=False, use_single_process=True, include_no_bug=True
    )

    if len(commits) > 0:
        testlabelselect_model = MODEL_CACHE.get("testlabelselect")
        testgroupselect_model = MODEL_CACHE.get("testgroupselect")

        tasks = testlabelselect_model.select_tests(commits, test_selection_threshold)

        reduced = testlabelselect_model.reduce(
            set(t for t, c in tasks.items() if c >= 0.8), 1.0
        )

        reduced_higher = testlabelselect_model.reduce(
            set(t for t, c in tasks.items() if c >= 0.9), 1.0
        )

        groups = testgroupselect_model.select_tests(commits, test_selection_threshold)

        config_groups = testgroupselect_model.select_configs(groups.keys(), 1.0)
    else:
        tasks = {}
        reduced = {}
        groups = {}
        config_groups = {}

    data = {
        "tasks": tasks,
        "groups": groups,
        "config_groups": config_groups,
        "reduced_tasks": {t: c for t, c in tasks.items() if t in reduced},
        "reduced_tasks_higher": {t: c for t, c in tasks.items() if t in reduced_higher},
        "known_tasks": get_known_tasks(),
    }
    setkey(job.result_key, orjson.dumps(data), compress=True)

    return "OK"
Exemple #9
0
def schedule_tests(branch, rev):
    from bugbug_http.app import JobInfo
    from bugbug_http import REPO_DIR

    job = JobInfo(schedule_tests, branch, rev)
    LOGGER.debug(f"Processing {job}")

    # Load the full stack of patches leading to that revision
    try:
        stack = get_hgmo_stack(branch, rev)
    except requests.exceptions.RequestException:
        LOGGER.warning(f"Push not found for {branch} @ {rev}!")
        return "NOK"

    # Apply the stack on the local repository
    try:
        revs = repository.apply_stack(REPO_DIR, stack, branch)
    except Exception as e:
        LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}")
        return "NOK"

    test_selection_threshold = float(
        os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.3))

    # Analyze patches.
    commits = repository.download_commits(REPO_DIR,
                                          revs=revs,
                                          save=False,
                                          use_single_process=True)

    commit_data = commit_features.merge_commits(commits)

    def get_runnables(granularity):
        past_failures_data = test_scheduling.get_past_failures(granularity)

        push_num = past_failures_data["push_num"]
        all_runnables = past_failures_data["all_runnables"]

        commit_tests = []
        for data in test_scheduling.generate_data(past_failures_data,
                                                  commit_data, push_num,
                                                  all_runnables, [], []):
            if granularity == "label" and not data["name"].startswith("test-"):
                continue

            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = MODEL_CACHE.get(f"test{granularity}select").classify(
            commit_tests, probabilities=True)
        selected_indexes = np.argwhere(
            probs[:, 1] > test_selection_threshold)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]:
            math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }

    data = {
        "tasks": get_runnables("label"),
        "groups": get_runnables("group"),
    }
    setkey(job.result_key, orjson.dumps(data))

    return "OK"