Example #1
0
    def retrieve_bugs(self):
        bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN])

        six_months_ago = datetime.utcnow() - relativedelta(months=6)
        two_years_and_six_months_ago = six_months_ago - relativedelta(years=2)
        logger.info('Downloading bugs from {} to {}'.format(
            two_years_and_six_months_ago, six_months_ago))
        bugzilla.download_bugs_between(two_years_and_six_months_ago,
                                       six_months_ago)

        logger.info('Downloading labelled bugs')
        bug_ids = labels.get_all_bug_ids()
        bugzilla.download_bugs(bug_ids)

        # Try to re-download inconsistent bugs, up to three times.
        for i in range(3):
            bug_ids = bug_snapshot.get_inconsistencies()
            if len(bug_ids) == 0:
                break

            logger.info(
                f'Re-downloading {len(bug_ids)} bugs, as they were inconsistent'
            )
            bugzilla.delete_bugs(bug_ids)
            bugzilla.download_bugs(bug_ids)

        self.compress_file('data/bugs.json')
Example #2
0
    def retrieve_bugs(self):
        bugzilla.set_token(get_secret("BUGZILLA_TOKEN"))

        six_months_ago = datetime.utcnow() - relativedelta(months=6)
        two_years_and_six_months_ago = six_months_ago - relativedelta(years=2)
        logger.info(
            "Downloading bugs from {} to {}".format(
                two_years_and_six_months_ago, six_months_ago
            )
        )
        bugzilla.download_bugs_between(two_years_and_six_months_ago, six_months_ago)

        logger.info("Downloading labelled bugs")
        bug_ids = labels.get_all_bug_ids()
        bugzilla.download_bugs(bug_ids)

        # Try to re-download inconsistent bugs, up to three times.
        for i in range(3):
            bug_ids = bug_snapshot.get_inconsistencies()
            if len(bug_ids) == 0:
                break

            logger.info(
                f"Re-downloading {len(bug_ids)} bugs, as they were inconsistent"
            )
            bugzilla.delete_bugs(bug_ids)
            bugzilla.download_bugs(bug_ids)

        self.compress_file("data/bugs.json")
Example #3
0
    def retrieve_bugs(self):
        bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN])

        six_months_ago = datetime.utcnow() - timedelta(182)
        two_years_and_six_months_ago = six_months_ago - timedelta(365)
        bugzilla.download_bugs_between(two_years_and_six_months_ago,
                                       six_months_ago)

        bug_ids = labels.get_all_bug_ids()
        bugzilla.download_bugs(bug_ids)

        self.compress_file('data/bugs.json')
Example #4
0
    def go(self):
        # Download models that were trained by bugbug_train.
        with ThreadPoolExecutorResult(max_workers=3) as executor:
            f1 = executor.submit(lambda: urlretrieve(
                'https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/bug.model.xz',
                'bug.model.xz'))  # noqa
            f1.add_done_callback(lambda f: self.decompress_file('bug.model'))

            f2 = executor.submit(lambda: urlretrieve(
                'https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/regression.model.xz',
                'regression.model.xz'))  # noqa
            f2.add_done_callback(
                lambda f: self.decompress_file('regression.model'))

            f3 = executor.submit(lambda: urlretrieve(
                'https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/tracking.model.xz',
                'tracking.model.xz'))  # noqa
            f3.add_done_callback(
                lambda f: self.decompress_file('tracking.model'))

        # Download bugs from the last week that we want to analyze.
        bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN])

        today = datetime.utcnow()
        one_week_ago = today - timedelta(7)
        bugzilla.download_bugs_between(one_week_ago, today)

        # Eval classifier for bug-vs-nonbug.
        self.eval_bug()

        # Eval classifier for regression-vs-nonregression.
        self.eval_regression()

        # Eval classifier for tracking bugs.
        self.eval_tracking()

        # Index the task in the TaskCluster index.
        self.index_service.insertTask(
            'project.releng.services.project.{}.bugbug_eval.latest'.format(
                secrets[secrets.APP_CHANNEL]), {
                    'taskId':
                    os.environ['TASK_ID'],
                    'rank':
                    0,
                    'data': {},
                    'expires':
                    (datetime.utcnow() +
                     timedelta(31)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
                })
Example #5
0
    def retrieve_bugs(self):
        bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN])

        six_months_ago = datetime.utcnow() - timedelta(182)
        two_years_and_six_months_ago = six_months_ago - timedelta(365)
        logger.info('Downloading bugs from {} to {}'.format(
            two_years_and_six_months_ago, six_months_ago))
        bugzilla.download_bugs_between(two_years_and_six_months_ago,
                                       six_months_ago)

        logger.info('Downloading labelled bugs')
        bug_ids = labels.get_all_bug_ids()
        bugzilla.download_bugs(bug_ids)

        self.compress_file('data/bugs.json')
Example #6
0
def fetch_untriaged(args):
    from bugbug import bugzilla
    today = date.today()
    three_months_ago = today - timedelta(days=args.days_back)

    # Set bugzilla token and download bugs
    bugzilla.set_token(args.token)
    bug_ids = bugzilla.download_bugs_between(three_months_ago, today)

    # Get untriaged bugs
    bugs = bugzilla.get_bugs()
    untriaged_bugs = []
    for bug in bugs:
        if bug['id'] not in bug_ids:
            continue

        for history in bug['history']:
            for change in history['changes']:
                if change['field_name'] == 'component' and change['removed'] == 'Untriaged':
                    untriaged_bugs.append(bug)

    with open('bugs-{}.json'.format(datetime.now().strftime('%s')), 'w') as f:
        json.dump(untriaged_bugs, f)

    return untriaged_bugs
Example #7
0
def fetch_untriaged(args):
    from bugbug import bugzilla

    today = date.today()
    three_months_ago = today - timedelta(days=args.days_back)

    # Set bugzilla token and download bugs
    bugzilla.set_token(args.token)
    bug_ids = bugzilla.download_bugs_between(three_months_ago, today)

    # Get untriaged bugs
    bugs = bugzilla.get_bugs()
    untriaged_bugs = []
    for bug in bugs:
        if bug["id"] not in bug_ids:
            continue

        for history in bug["history"]:
            for change in history["changes"]:
                if (change["field_name"] == "component"
                        and change["removed"] == "Untriaged"):
                    untriaged_bugs.append(bug)

    with open("bugs-{}.json".format(datetime.now().strftime("%s")), "w") as f:
        json.dump(untriaged_bugs, f)

    return untriaged_bugs
Example #8
0
    def go(self):
        # Download models that were trained by bugbug_train.
        with ThreadPoolExecutorResult(max_workers=3) as executor:
            f1 = executor.submit(lambda: urlretrieve('https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/bugmodel.xz', 'bugmodel.xz'))  # noqa
            f1.add_done_callback(lambda f: self.decompress_file('bugmodel'))

            f2 = executor.submit(lambda: urlretrieve('https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/regressionmodel.xz', 'regressionmodel.xz'))  # noqa
            f2.add_done_callback(lambda f: self.decompress_file('regressionmodel'))

            f3 = executor.submit(lambda: urlretrieve('https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/trackingmodel.xz', 'trackingmodel.xz'))  # noqa
            f3.add_done_callback(lambda f: self.decompress_file('trackingmodel'))

        # Download bugs from the last week that we want to analyze.
        bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN])

        today = datetime.utcnow()
        one_week_ago = today - timedelta(7)
        bugzilla.download_bugs_between(one_week_ago, today)

        # Eval classifier for bug-vs-nonbug.
        self.eval_bug()

        # Eval classifier for regression-vs-nonregression.
        self.eval_regression()

        # Eval classifier for tracking bugs.
        self.eval_tracking()

        # Index the task in the TaskCluster index.
        self.index_service.insertTask(
            'project.releng.services.project.{}.bugbug_eval.latest'.format(secrets[secrets.APP_CHANNEL]),
            {
                'taskId': os.environ['TASK_ID'],
                'rank': 0,
                'data': {},
                'expires': (datetime.utcnow() + timedelta(31)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            }
        )
Example #9
0
            feature_names = model.get_feature_names()
            for i, (importance, index, is_positive) in enumerate(importances):
                print(
                    f'{i + 1}. \'{feature_names[int(index)]}\' ({"+" if (is_positive) else "-"}{importance})'
                )

            if np.argmax(probas) == 1:
                print(f'Positive! {probas}')
            else:
                print(f'Negative! {probas}')
            input()

    if args.generate_sheet:
        today = datetime.utcnow()
        a_week_ago = today - timedelta(7)
        bug_ids = bugzilla.download_bugs_between(a_week_ago, today)

        print(f'Classifying {len(bug_ids)} bugs...')

        rows = [['Bug', f'{args.goal}(model)', args.goal, 'Title']]

        for bug in bugzilla.get_bugs():
            if bug['id'] not in bug_ids:
                continue

            p = model.classify(bug, probabilities=True)
            rows.append([
                f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}',
                'y' if p[0][1] >= 0.7 else 'n', '', bug['summary']
            ])
Example #10
0
def main(args):
    model_file_name = "{}{}model".format(
        args.goal, "" if args.classifier == "default" else args.classifier)

    if args.goal == "component":
        if args.classifier == "default":
            model_class_name = "component"
        else:
            model_class_name = "component_nn"
    else:
        model_class_name = args.goal

    model_class = get_model_class(model_class_name)

    if args.train:
        db.download(bugzilla.BUGS_DB)
        db.download(repository.COMMITS_DB)

        historical_supported_tasks = [
            "defect",
            "bugtype",
            "defectenhancementtask",
            "regression",
        ]

        if args.goal in historical_supported_tasks:
            model = model_class(args.lemmatization, args.historical)
        elif args.goal == "duplicate":
            model = model_class(args.training_set_size, args.lemmatization)
        else:
            model = model_class(args.lemmatization)
        model.train()
    else:
        model = model_class.load(model_file_name)

    if args.classify:
        for bug in bugzilla.get_bugs():
            print(
                f'https://bugzilla.mozilla.org/show_bug.cgi?id={ bug["id"] } - { bug["summary"]} '
            )

            if model.calculate_importance:
                probas, importance = model.classify(bug,
                                                    probabilities=True,
                                                    importances=True)

                feature_names = model.get_feature_names()
                for i, (importance, index,
                        is_positive) in enumerate(importance["importances"]):
                    print(
                        f'{i + 1}. \'{feature_names[int(index)]}\' ({"+" if (is_positive) else "-"}{importance})'
                    )
            else:
                probas = model.classify(bug,
                                        probabilities=True,
                                        importances=False)

            if np.argmax(probas) == 1:
                print(f"Positive! {probas}")
            else:
                print(f"Negative! {probas}")
            input()

    if args.generate_sheet:
        assert (args.token is not None
                ), "A Bugzilla token should be set in order to download bugs"
        today = datetime.utcnow()
        a_week_ago = today - timedelta(7)
        bugzilla.set_token(args.token)
        bugs = bugzilla.download_bugs_between(a_week_ago, today)

        print(f"Classifying {len(bugs)} bugs...")

        rows = [["Bug", f"{args.goal}(model)", args.goal, "Title"]]

        for bug in bugs:
            p = model.classify(bug, probabilities=True)
            rows.append([
                f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}',
                "y" if p[0][1] >= 0.7 else "n",
                "",
                bug["summary"],
            ])

        os.makedirs("sheets", exist_ok=True)
        with open(
                os.path.join(
                    "sheets",
                    f'{args.goal}-{datetime.utcnow().strftime("%Y-%m-%d")}-labels.csv',
                ),
                "w",
        ) as f:
            writer = csv.writer(f)
            writer.writerows(rows)
Example #11
0
from bugbug import bugzilla
from bugbug.models.duplicate import DuplicateModel

m = DuplicateModel.load("duplicatemodel")

REPORTERS_TO_IGNORE = {
    "*****@*****.**", "*****@*****.**"
}

try:
    with open("duplicate_test_bugs.json", "r") as f:
        test_bugs = json.load(f)
except FileNotFoundError:
    test_bugs = bugzilla.download_bugs_between(datetime.now() -
                                               timedelta(days=21),
                                               datetime.now(),
                                               store=False)
    test_bugs = [
        bug for bug in test_bugs if not bug["creator"] in REPORTERS_TO_IGNORE
    ]
    with open("duplicate_test_bugs.json", "w") as f:
        json.dump(test_bugs, f)

bug_tuples = list(itertools.combinations(test_bugs, 2))

# Warning: Classifying all the test bugs takes a while
probs = m.classify(bug_tuples, probabilities=True)

with open("duplicate_predictions.csv", "w") as csvfile:
    spamwriter = csv.writer(csvfile)
Example #12
0
                probas = model.classify(bug, probabilities=True, importances=False)

            if np.argmax(probas) == 1:
                print(f"Positive! {probas}")
            else:
                print(f"Negative! {probas}")
            input()

    if args.generate_sheet:
        assert (
            args.token is not None
        ), "A Bugzilla token should be set in order to download bugs"
        today = datetime.utcnow()
        a_week_ago = today - timedelta(7)
        bugzilla.set_token(args.token)
        bugs = bugzilla.download_bugs_between(a_week_ago, today)

        print(f"Classifying {len(bugs)} bugs...")

        rows = [["Bug", f"{args.goal}(model)", args.goal, "Title"]]

        for bug in bugs:
            p = model.classify(bug, probabilities=True)
            rows.append(
                [
                    f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}',
                    "y" if p[0][1] >= 0.7 else "n",
                    "",
                    bug["summary"],
                ]
            )