Ejemplo n.º 1
0
    def get_bugbug_labels(self, kind='bug'):
        assert kind in ['bug', 'regression']

        classes = {}

        for bug_id, category in labels.get_labels('bug_nobug'):
            assert category in ['True',
                                'False'], f'unexpected category {category}'
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category == 'True' else 0
            elif kind == 'regression':
                if category == 'False':
                    classes[int(bug_id)] = 0

        for bug_id, category in labels.get_labels('regression_bug_nobug'):
            assert category in [
                'nobug', 'bug_unknown_regression', 'bug_no_regression',
                'regression'
            ], f'unexpected category {category}'
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category != 'nobug' else 0
            elif kind == 'regression':
                if category == 'bug_unknown_regression':
                    continue

                classes[int(bug_id)] = 1 if category == 'regression' else 0

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug['id'])

            bug_ids.add(bug_id)

            if bug_id in classes:
                continue

            if any(keyword in bug['keywords']
                   for keyword in ['regression', 'talos-regression']) or (
                       'cf_has_regression_range' in bug
                       and bug['cf_has_regression_range'] == 'yes'):
                classes[bug_id] = 1
            elif any(keyword in bug['keywords'] for keyword in ['feature']):
                classes[bug_id] = 0
            elif kind == 'regression':
                for history in bug['history']:
                    for change in history['changes']:
                        if change['field_name'] == 'keywords' and change[
                                'removed'] == 'regression':
                            classes[bug_id] = 0

        # Remove labels which belong to bugs for which we have no data.
        return {
            bug_id: label
            for bug_id, label in classes.items() if bug_id in bug_ids
        }
Ejemplo n.º 2
0
    def get_labels(self):
        classes = {}

        for bug_id, category in labels.get_labels("tracking"):
            assert category in ["True",
                                "False"], f"unexpected category {category}"
            classes[int(bug_id)] = 1 if category == "True" else 0

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for entry in bug_data["history"]:
                for change in entry["changes"]:
                    if change["field_name"].startswith("cf_tracking_firefox"):
                        if change["added"] in ["blocking", "+"]:
                            classes[bug_id] = 1
                        elif change["added"] == "-":
                            classes[bug_id] = 0

            if bug_data["resolution"] in ["INVALID", "DUPLICATE"]:
                continue

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes
Ejemplo n.º 3
0
    def get_labels(self):
        classes = {}

        regressors = set(r[0] for r in labels.get_labels("regressor"))

        for commit_data in repository.get_commits():
            if commit_data["ever_backedout"]:
                continue

            node = commit_data["node"]
            if node in regressors:
                classes[node] = 1
            else:
                push_date = dateutil.parser.parse(commit_data["pushdate"])

                # The labels we have are only from 2016-11-01.
                # TODO: Automate collection of labels and somehow remove this check.
                if push_date < datetime(2016, 11, 1):
                    continue

                # We remove the last 6 months, as there could be regressions which haven't been filed yet.
                if push_date > datetime.utcnow() - relativedelta(months=6):
                    continue

                classes[node] = 0

        print("{} commits caused regressions".format(
            sum(1 for label in classes.values() if label == 1)))

        print("{} commits did not cause regressions".format(
            sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]
Ejemplo n.º 4
0
    def get_labels(self):
        classes = {}

        for bug_id, category in labels.get_labels('tracking'):
            assert category in ['True', 'False'], f'unexpected category {category}'
            classes[int(bug_id)] = 1 if category == 'True' else 0

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data['id'])

            for entry in bug_data['history']:
                for change in entry['changes']:
                    if change['field_name'].startswith('cf_tracking_firefox'):
                        if change['added'] in ['blocking', '+']:
                            classes[bug_id] = 1
                        elif change['added'] == '-':
                            classes[bug_id] = 0

            if bug_data['resolution'] in ['INVALID', 'DUPLICATE']:
                continue

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes
Ejemplo n.º 5
0
    def get_labels(self):
        classes = {}

        # Commits in regressor or regression bugs usually are not formatting changes.
        regression_related_bugs = set(
            sum(
                (bug["regressed_by"] + bug["regressions"]
                 for bug in bugzilla.get_bugs()),
                [],
            ))

        for commit_data in repository.get_commits(include_ignored=True):
            if commit_data["backedoutby"]:
                continue

            node = commit_data["node"]

            if commit_data["ignored"]:
                classes[node] = 1
            elif commit_data["bug_id"] in regression_related_bugs:
                classes[node] = 0

        for node, label in labels.get_labels("annotateignore"):
            classes[node] = int(label)

        print("{} commits that can be ignored".format(
            sum(1 for label in classes.values() if label == 1)))

        print("{} commits that cannot be ignored".format(
            sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]
Ejemplo n.º 6
0
    def get_labels(self):
        classes = {}

        for bug_id, category in labels.get_labels("tracking"):
            assert category in ["True", "False"], f"unexpected category {category}"
            classes[int(bug_id)] = 1 if category == "True" else 0

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for entry in bug_data["history"]:
                for change in entry["changes"]:
                    if change["field_name"].startswith("cf_tracking_firefox"):
                        if change["added"] in ["blocking", "+"]:
                            classes[bug_id] = 1
                        elif change["added"] == "-":
                            classes[bug_id] = 0

            if bug_data["resolution"] in ["INVALID", "DUPLICATE"]:
                continue

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Ejemplo n.º 7
0
    def get_commits_to_ignore(self) -> None:
        assert db.download(repository.COMMITS_DB)

        ignored = set()
        commits_to_ignore = []
        all_commits = set()

        annotate_ignore_nodes = {
            node
            for node, label in labels.get_labels("annotateignore")
            if label == "1"
        }

        for commit in repository.get_commits(include_no_bug=True,
                                             include_backouts=True,
                                             include_ignored=True):
            all_commits.add(commit["node"][:12])

            if (commit["ignored"] or commit["backedoutby"]
                    or not commit["bug_id"] or len(commit["backsout"]) > 0
                    or repository.is_wptsync(commit)
                    or commit["node"] in annotate_ignore_nodes):
                commits_to_ignore.append({
                    "rev":
                    commit["node"],
                    "type":
                    "backedout" if commit["backedoutby"] else "",
                })
                ignored.add(commit["node"][:12])

            if len(commit["backsout"]) > 0:
                for backedout in commit["backsout"]:
                    if backedout[:12] in ignored:
                        continue
                    ignored.add(backedout[:12])

                    commits_to_ignore.append({
                        "rev": backedout,
                        "type": "backedout"
                    })

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        # Skip backed-out commits which aren't in the repository (commits which landed *before* the Mercurial history
        # started, and backouts which mentioned a bad hash in their message).
        commits_to_ignore = [
            c for c in commits_to_ignore if c["rev"][:12] in all_commits
        ]

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        logger.info("...of which {} are backed-out".format(
            sum(1 for commit in commits_to_ignore
                if commit["type"] == "backedout")))

        db.write(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)
        db.upload(IGNORED_COMMITS_DB)
Ejemplo n.º 8
0
    def get_labels(self):
        classes = self.get_bugbug_labels('bug')

        classes = {bug_id: 'd' for bug_id, label in classes.items() if label == 1}

        for bug_id, label in labels.get_labels('defect_feature_task'):
            assert label in ['d', 'f', 't']
            classes[int(bug_id)] = label

        print('{} defects'.format(sum(1 for label in classes.values() if label == 'd')))
        print('{} features'.format(sum(1 for label in classes.values() if label == 'f')))
        print('{} tasks'.format(sum(1 for label in classes.values() if label == 't')))

        return classes
Ejemplo n.º 9
0
    def get_bugbug_labels(self, kind="bug"):
        assert kind in ["bug", "regression", "defect_enhancement_task"]

        classes = {}

        for bug_id, category in labels.get_labels("bug_nobug"):
            assert category in ["True",
                                "False"], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "True" else 0
            elif kind == "regression":
                if category == "False":
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                if category == "True":
                    classes[int(bug_id)] = "defect"

        for bug_id, category in labels.get_labels("regression_bug_nobug"):
            assert category in [
                "nobug",
                "bug_unknown_regression",
                "bug_no_regression",
                "regression",
            ], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category != "nobug" else 0
            elif kind == "regression":
                if category == "bug_unknown_regression":
                    continue

                classes[int(bug_id)] = 1 if category == "regression" else 0
            elif kind == "defect_enhancement_task":
                if category != "nobug":
                    classes[int(bug_id)] = "defect"

        defect_enhancement_task_e = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                "defect_enhancement_task_e")
        }
        defect_enhancement_task_p = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                "defect_enhancement_task_p")
        }
        defect_enhancement_task_s = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                "defect_enhancement_task_s")
        }
        defect_enhancement_task_h = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                "defect_enhancement_task_h")
        }

        defect_enhancement_task_common = (
            (bug_id, category)
            for bug_id, category in defect_enhancement_task_p.items()
            if (bug_id not in defect_enhancement_task_e
                or defect_enhancement_task_e[bug_id] ==
                defect_enhancement_task_p[bug_id]) and (
                    bug_id not in defect_enhancement_task_s
                    or defect_enhancement_task_s[bug_id] ==
                    defect_enhancement_task_p[bug_id]) and (
                        bug_id not in defect_enhancement_task_h
                        or defect_enhancement_task_h[bug_id] ==
                        defect_enhancement_task_p[bug_id]))

        for bug_id, category in itertools.chain(
                labels.get_labels("defect_enhancement_task"),
                defect_enhancement_task_common):
            assert category in ["defect", "enhancement", "task"]
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "defect" else 0
            elif kind == "regression":
                if category in ["enhancement", "task"]:
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                classes[int(bug_id)] = category

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        # And also use the new bug type field.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug["id"])

            bug_ids.add(bug_id)

            if bug_id in classes:
                continue

            if any(keyword in bug["keywords"]
                   for keyword in ["regression", "talos-regression"]) or (
                       "cf_has_regression_range" in bug
                       and bug["cf_has_regression_range"] == "yes"):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 1
                else:
                    classes[bug_id] = "defect"
            elif any(keyword in bug["keywords"] for keyword in ["feature"]):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 0
                else:
                    classes[bug_id] = "enhancement"
            elif kind == "regression":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "keywords":
                            if "regression" in [
                                    k.strip()
                                    for k in change["removed"].split(",")
                            ]:
                                classes[bug_id] = 0
                            elif "regression" in [
                                    k.strip()
                                    for k in change["added"].split(",")
                            ]:
                                classes[bug_id] = 1

            # The conditions to use the 'defect' type are more restricted.
            can_use_type = False
            can_use_defect_type = False

            # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects.
            if bug["id"] > 1_540_807:
                can_use_type = True

            # And we can use the type as a label for bugs whose type has been modified.
            # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes.
            if not can_use_type or bug["type"] == "defect":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "type":
                            can_use_type = can_use_defect_type = True

            if can_use_type:
                if bug["type"] == "enhancement":
                    if kind == "bug":
                        classes[int(bug_id)] = 0
                    elif kind == "regression":
                        classes[int(bug_id)] = 0
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "enhancement"
                elif bug["type"] == "task":
                    if kind == "bug":
                        classes[int(bug_id)] = 0
                    elif kind == "regression":
                        classes[int(bug_id)] = 0
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "task"
                elif bug["type"] == "defect" and can_use_defect_type:
                    if kind == "bug":
                        classes[int(bug_id)] = 1
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "defect"

        # Remove labels which belong to bugs for which we have no data.
        return {
            bug_id: label
            for bug_id, label in classes.items() if bug_id in bug_ids
        }
Ejemplo n.º 10
0
    def get_bugbug_labels(self, kind='bug'):
        assert kind in ['bug', 'regression', 'defect_enhancement_task']

        classes = {}

        for bug_id, category in labels.get_labels('bug_nobug'):
            assert category in ['True',
                                'False'], f'unexpected category {category}'
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category == 'True' else 0
            elif kind == 'regression':
                if category == 'False':
                    classes[int(bug_id)] = 0
            elif kind == 'defect_enhancement_task':
                if category == 'True':
                    classes[int(bug_id)] = 'defect'

        for bug_id, category in labels.get_labels('regression_bug_nobug'):
            assert category in [
                'nobug', 'bug_unknown_regression', 'bug_no_regression',
                'regression'
            ], f'unexpected category {category}'
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category != 'nobug' else 0
            elif kind == 'regression':
                if category == 'bug_unknown_regression':
                    continue

                classes[int(bug_id)] = 1 if category == 'regression' else 0
            elif kind == 'defect_enhancement_task':
                if category != 'nobug':
                    classes[int(bug_id)] = 'defect'

        defect_enhancement_task_e = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                'defect_enhancement_task_e')
        }
        defect_enhancement_task_p = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                'defect_enhancement_task_p')
        }
        defect_enhancement_task_s = {
            bug_id: category
            for bug_id, category in labels.get_labels(
                'defect_enhancement_task_s')
        }

        defect_enhancement_task_common = (
            (bug_id, category)
            for bug_id, category in defect_enhancement_task_p.items()
            if (bug_id not in defect_enhancement_task_e
                or defect_enhancement_task_e[bug_id] ==
                defect_enhancement_task_p[bug_id]) and (
                    bug_id not in defect_enhancement_task_s
                    or defect_enhancement_task_s[bug_id] ==
                    defect_enhancement_task_p[bug_id]))

        for bug_id, category in itertools.chain(
                labels.get_labels('defect_enhancement_task'),
                defect_enhancement_task_common):
            assert category in ['d', 'e', 't']
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category == 'd' else 0
            elif kind == 'regression':
                if category in ['e', 't']:
                    classes[int(bug_id)] = 0
            elif kind == 'defect_enhancement_task':
                if category == 'd':
                    classes[int(bug_id)] = 'defect'
                elif category == 'e':
                    classes[int(bug_id)] = 'enhancement'
                elif category == 't':
                    classes[int(bug_id)] = 'task'

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        # And also use the new bug type field.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug['id'])

            bug_ids.add(bug_id)

            if bug_id in classes:
                continue

            if any(keyword in bug['keywords']
                   for keyword in ['regression', 'talos-regression']) or (
                       'cf_has_regression_range' in bug
                       and bug['cf_has_regression_range'] == 'yes'):
                if kind in ['bug', 'regression']:
                    classes[bug_id] = 1
                else:
                    classes[bug_id] = 'defect'
            elif any(keyword in bug['keywords'] for keyword in ['feature']):
                if kind in ['bug', 'regression']:
                    classes[bug_id] = 0
                else:
                    classes[bug_id] = 'enhancement'
            elif kind == 'regression':
                for history in bug['history']:
                    for change in history['changes']:
                        if change['field_name'] == 'keywords':
                            if 'regression' in change['removed'].split(','):
                                classes[bug_id] = 0
                            elif 'regression' in change['added'].split(','):
                                classes[bug_id] = 1

            # The conditions to use the 'defect' type are more restricted.
            can_use_type = False
            can_use_defect_type = False

            # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects.
            if bug['id'] > 1540807:
                can_use_type = True

            # And we can use the type as a label for bugs whose type has been modified.
            # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes.
            if not can_use_type or bug['type'] == 'defect':
                for history in bug['history']:
                    for change in history['changes']:
                        if change['field_name'] == 'type':
                            can_use_type = can_use_defect_type = True

            if can_use_type:
                if bug['type'] == 'enhancement':
                    if kind == 'bug':
                        classes[int(bug_id)] = 0
                    elif kind == 'regression':
                        classes[int(bug_id)] = 0
                    elif kind == 'defect_enhancement_task':
                        classes[int(bug_id)] = 'enhancement'
                elif bug['type'] == 'task':
                    if kind == 'bug':
                        classes[int(bug_id)] = 0
                    elif kind == 'regression':
                        classes[int(bug_id)] = 0
                    elif kind == 'defect_enhancement_task':
                        classes[int(bug_id)] = 'task'
                elif bug['type'] == 'defect' and can_use_defect_type:
                    if kind == 'bug':
                        classes[int(bug_id)] = 1
                    elif kind == 'defect_enhancement_task':
                        classes[int(bug_id)] = 'defect'

        # Remove labels which belong to bugs for which we have no data.
        return {
            bug_id: label
            for bug_id, label in classes.items() if bug_id in bug_ids
        }
Ejemplo n.º 11
0
    def get_bugbug_labels(self, kind='bug'):
        assert kind in ['bug', 'regression', 'defect_feature_task']

        classes = {}

        for bug_id, category in labels.get_labels('bug_nobug'):
            assert category in ['True',
                                'False'], f'unexpected category {category}'
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category == 'True' else 0
            elif kind == 'regression':
                if category == 'False':
                    classes[int(bug_id)] = 0
            elif kind == 'defect_feature_task':
                if category == 'True':
                    classes[int(bug_id)] = 'd'

        for bug_id, category in labels.get_labels('regression_bug_nobug'):
            assert category in [
                'nobug', 'bug_unknown_regression', 'bug_no_regression',
                'regression'
            ], f'unexpected category {category}'
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category != 'nobug' else 0
            elif kind == 'regression':
                if category == 'bug_unknown_regression':
                    continue

                classes[int(bug_id)] = 1 if category == 'regression' else 0
            elif kind == 'defect_feature_task':
                if category != 'nobug':
                    classes[int(bug_id)] = 'd'

        defect_feature_task_e = {
            bug_id: category
            for bug_id, category in labels.get_labels('defect_feature_task_e')
        }
        defect_feature_task_p = {
            bug_id: category
            for bug_id, category in labels.get_labels('defect_feature_task_p')
        }
        defect_feature_task_s = {
            bug_id: category
            for bug_id, category in labels.get_labels('defect_feature_task_s')
        }

        defect_feature_task_common = (
            (bug_id, category)
            for bug_id, category in defect_feature_task_p.items()
            if (bug_id not in defect_feature_task_e or
                defect_feature_task_e[bug_id] == defect_feature_task_p[bug_id])
            and
            (bug_id not in defect_feature_task_s or
             defect_feature_task_s[bug_id] == defect_feature_task_p[bug_id]))

        for bug_id, category in itertools.chain(
                labels.get_labels('defect_feature_task'),
                defect_feature_task_common):
            assert category in ['d', 'e', 't']
            if kind == 'bug':
                classes[int(bug_id)] = 1 if category == 'd' else 0
            elif kind == 'regression':
                if category in ['e', 't']:
                    classes[int(bug_id)] = 0
            elif kind == 'defect_feature_task':
                classes[int(bug_id)] = category

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug['id'])

            bug_ids.add(bug_id)

            if bug_id in classes:
                continue

            if any(keyword in bug['keywords']
                   for keyword in ['regression', 'talos-regression']) or (
                       'cf_has_regression_range' in bug
                       and bug['cf_has_regression_range'] == 'yes'):
                if kind in ['bug', 'regression']:
                    classes[bug_id] = 1
                else:
                    classes[bug_id] = 'd'
            elif any(keyword in bug['keywords'] for keyword in ['feature']):
                if kind in ['bug', 'regression']:
                    classes[bug_id] = 0
                else:
                    classes[bug_id] = 'e'
            elif kind == 'regression':
                for history in bug['history']:
                    for change in history['changes']:
                        if change['field_name'] == 'keywords':
                            if 'regression' in change['removed'].split(','):
                                classes[bug_id] = 0
                            elif 'regression' in change['added'].split(','):
                                classes[bug_id] = 1

        # Remove labels which belong to bugs for which we have no data.
        return {
            bug_id: label
            for bug_id, label in classes.items() if bug_id in bug_ids
        }
Ejemplo n.º 12
0
    def get_bugbug_labels(self, kind="bug"):
        assert kind in ["bug", "regression", "defect_enhancement_task"]

        classes = {}

        for bug_id, category in labels.get_labels("bug_nobug"):
            assert category in ["True", "False"], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "True" else 0
            elif kind == "regression":
                if category == "False":
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                if category == "True":
                    classes[int(bug_id)] = "defect"

        for bug_id, category in labels.get_labels("regression_bug_nobug"):
            assert category in [
                "nobug",
                "bug_unknown_regression",
                "bug_no_regression",
                "regression",
            ], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category != "nobug" else 0
            elif kind == "regression":
                if category == "bug_unknown_regression":
                    continue

                classes[int(bug_id)] = 1 if category == "regression" else 0
            elif kind == "defect_enhancement_task":
                if category != "nobug":
                    classes[int(bug_id)] = "defect"

        defect_enhancement_task_e = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_e")
        }
        defect_enhancement_task_p = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_p")
        }
        defect_enhancement_task_s = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_s")
        }
        defect_enhancement_task_h = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_h")
        }

        defect_enhancement_task_common = (
            (bug_id, category)
            for bug_id, category in defect_enhancement_task_p.items()
            if (
                bug_id not in defect_enhancement_task_e
                or defect_enhancement_task_e[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
            and (
                bug_id not in defect_enhancement_task_s
                or defect_enhancement_task_s[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
            and (
                bug_id not in defect_enhancement_task_h
                or defect_enhancement_task_h[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
        )

        for bug_id, category in itertools.chain(
            labels.get_labels("defect_enhancement_task"), defect_enhancement_task_common
        ):
            assert category in ["defect", "enhancement", "task"]
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "defect" else 0
            elif kind == "regression":
                if category in ["enhancement", "task"]:
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                classes[int(bug_id)] = category

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        # And also use the new bug type field.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug["id"])

            bug_ids.add(bug_id)

            if bug_id in classes:
                continue

            if any(
                keyword in bug["keywords"]
                for keyword in ["regression", "talos-regression"]
            ) or (
                "cf_has_regression_range" in bug
                and bug["cf_has_regression_range"] == "yes"
            ):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 1
                else:
                    classes[bug_id] = "defect"
            elif any(keyword in bug["keywords"] for keyword in ["feature"]):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 0
                else:
                    classes[bug_id] = "enhancement"
            elif kind == "regression":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "keywords":
                            if "regression" in change["removed"].split(","):
                                classes[bug_id] = 0
                            elif "regression" in change["added"].split(","):
                                classes[bug_id] = 1

            # The conditions to use the 'defect' type are more restricted.
            can_use_type = False
            can_use_defect_type = False

            # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects.
            if bug["id"] > 1_540_807:
                can_use_type = True

            # And we can use the type as a label for bugs whose type has been modified.
            # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes.
            if not can_use_type or bug["type"] == "defect":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "type":
                            can_use_type = can_use_defect_type = True

            if can_use_type:
                if bug["type"] == "enhancement":
                    if kind == "bug":
                        classes[int(bug_id)] = 0
                    elif kind == "regression":
                        classes[int(bug_id)] = 0
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "enhancement"
                elif bug["type"] == "task":
                    if kind == "bug":
                        classes[int(bug_id)] = 0
                    elif kind == "regression":
                        classes[int(bug_id)] = 0
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "task"
                elif bug["type"] == "defect" and can_use_defect_type:
                    if kind == "bug":
                        classes[int(bug_id)] = 1
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "defect"

        # Remove labels which belong to bugs for which we have no data.
        return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}