def retrieve_bugs(self): bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN]) six_months_ago = datetime.utcnow() - relativedelta(months=6) two_years_and_six_months_ago = six_months_ago - relativedelta(years=2) logger.info('Downloading bugs from {} to {}'.format( two_years_and_six_months_ago, six_months_ago)) bugzilla.download_bugs_between(two_years_and_six_months_ago, six_months_ago) logger.info('Downloading labelled bugs') bug_ids = labels.get_all_bug_ids() bugzilla.download_bugs(bug_ids) # Try to re-download inconsistent bugs, up to three times. for i in range(3): bug_ids = bug_snapshot.get_inconsistencies() if len(bug_ids) == 0: break logger.info( f'Re-downloading {len(bug_ids)} bugs, as they were inconsistent' ) bugzilla.delete_bugs(bug_ids) bugzilla.download_bugs(bug_ids) self.compress_file('data/bugs.json')
def retrieve_bugs(self): bugzilla.set_token(get_secret("BUGZILLA_TOKEN")) six_months_ago = datetime.utcnow() - relativedelta(months=6) two_years_and_six_months_ago = six_months_ago - relativedelta(years=2) logger.info( "Downloading bugs from {} to {}".format( two_years_and_six_months_ago, six_months_ago ) ) bugzilla.download_bugs_between(two_years_and_six_months_ago, six_months_ago) logger.info("Downloading labelled bugs") bug_ids = labels.get_all_bug_ids() bugzilla.download_bugs(bug_ids) # Try to re-download inconsistent bugs, up to three times. for i in range(3): bug_ids = bug_snapshot.get_inconsistencies() if len(bug_ids) == 0: break logger.info( f"Re-downloading {len(bug_ids)} bugs, as they were inconsistent" ) bugzilla.delete_bugs(bug_ids) bugzilla.download_bugs(bug_ids) self.compress_file("data/bugs.json")
def retrieve_bugs(self): bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN]) six_months_ago = datetime.utcnow() - timedelta(182) two_years_and_six_months_ago = six_months_ago - timedelta(365) bugzilla.download_bugs_between(two_years_and_six_months_ago, six_months_ago) bug_ids = labels.get_all_bug_ids() bugzilla.download_bugs(bug_ids) self.compress_file('data/bugs.json')
def go(self): # Download models that were trained by bugbug_train. with ThreadPoolExecutorResult(max_workers=3) as executor: f1 = executor.submit(lambda: urlretrieve( 'https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/bug.model.xz', 'bug.model.xz')) # noqa f1.add_done_callback(lambda f: self.decompress_file('bug.model')) f2 = executor.submit(lambda: urlretrieve( 'https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/regression.model.xz', 'regression.model.xz')) # noqa f2.add_done_callback( lambda f: self.decompress_file('regression.model')) f3 = executor.submit(lambda: urlretrieve( 'https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/tracking.model.xz', 'tracking.model.xz')) # noqa f3.add_done_callback( lambda f: self.decompress_file('tracking.model')) # Download bugs from the last week that we want to analyze. bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN]) today = datetime.utcnow() one_week_ago = today - timedelta(7) bugzilla.download_bugs_between(one_week_ago, today) # Eval classifier for bug-vs-nonbug. self.eval_bug() # Eval classifier for regression-vs-nonregression. self.eval_regression() # Eval classifier for tracking bugs. self.eval_tracking() # Index the task in the TaskCluster index. self.index_service.insertTask( 'project.releng.services.project.{}.bugbug_eval.latest'.format( secrets[secrets.APP_CHANNEL]), { 'taskId': os.environ['TASK_ID'], 'rank': 0, 'data': {}, 'expires': (datetime.utcnow() + timedelta(31)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), })
def retrieve_bugs(self): bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN]) six_months_ago = datetime.utcnow() - timedelta(182) two_years_and_six_months_ago = six_months_ago - timedelta(365) logger.info('Downloading bugs from {} to {}'.format( two_years_and_six_months_ago, six_months_ago)) bugzilla.download_bugs_between(two_years_and_six_months_ago, six_months_ago) logger.info('Downloading labelled bugs') bug_ids = labels.get_all_bug_ids() bugzilla.download_bugs(bug_ids) self.compress_file('data/bugs.json')
def fetch_untriaged(args): from bugbug import bugzilla today = date.today() three_months_ago = today - timedelta(days=args.days_back) # Set bugzilla token and download bugs bugzilla.set_token(args.token) bug_ids = bugzilla.download_bugs_between(three_months_ago, today) # Get untriaged bugs bugs = bugzilla.get_bugs() untriaged_bugs = [] for bug in bugs: if bug['id'] not in bug_ids: continue for history in bug['history']: for change in history['changes']: if change['field_name'] == 'component' and change['removed'] == 'Untriaged': untriaged_bugs.append(bug) with open('bugs-{}.json'.format(datetime.now().strftime('%s')), 'w') as f: json.dump(untriaged_bugs, f) return untriaged_bugs
def fetch_untriaged(args): from bugbug import bugzilla today = date.today() three_months_ago = today - timedelta(days=args.days_back) # Set bugzilla token and download bugs bugzilla.set_token(args.token) bug_ids = bugzilla.download_bugs_between(three_months_ago, today) # Get untriaged bugs bugs = bugzilla.get_bugs() untriaged_bugs = [] for bug in bugs: if bug["id"] not in bug_ids: continue for history in bug["history"]: for change in history["changes"]: if (change["field_name"] == "component" and change["removed"] == "Untriaged"): untriaged_bugs.append(bug) with open("bugs-{}.json".format(datetime.now().strftime("%s")), "w") as f: json.dump(untriaged_bugs, f) return untriaged_bugs
def go(self): # Download models that were trained by bugbug_train. with ThreadPoolExecutorResult(max_workers=3) as executor: f1 = executor.submit(lambda: urlretrieve('https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/bugmodel.xz', 'bugmodel.xz')) # noqa f1.add_done_callback(lambda f: self.decompress_file('bugmodel')) f2 = executor.submit(lambda: urlretrieve('https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/regressionmodel.xz', 'regressionmodel.xz')) # noqa f2.add_done_callback(lambda f: self.decompress_file('regressionmodel')) f3 = executor.submit(lambda: urlretrieve('https://index.taskcluster.net/v1/task/project.releng.services.project.testing.bugbug_train.latest/artifacts/public/trackingmodel.xz', 'trackingmodel.xz')) # noqa f3.add_done_callback(lambda f: self.decompress_file('trackingmodel')) # Download bugs from the last week that we want to analyze. bugzilla.set_token(secrets[secrets.BUGZILLA_TOKEN]) today = datetime.utcnow() one_week_ago = today - timedelta(7) bugzilla.download_bugs_between(one_week_ago, today) # Eval classifier for bug-vs-nonbug. self.eval_bug() # Eval classifier for regression-vs-nonregression. self.eval_regression() # Eval classifier for tracking bugs. self.eval_tracking() # Index the task in the TaskCluster index. self.index_service.insertTask( 'project.releng.services.project.{}.bugbug_eval.latest'.format(secrets[secrets.APP_CHANNEL]), { 'taskId': os.environ['TASK_ID'], 'rank': 0, 'data': {}, 'expires': (datetime.utcnow() + timedelta(31)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), } )
feature_names = model.get_feature_names() for i, (importance, index, is_positive) in enumerate(importances): print( f'{i + 1}. \'{feature_names[int(index)]}\' ({"+" if (is_positive) else "-"}{importance})' ) if np.argmax(probas) == 1: print(f'Positive! {probas}') else: print(f'Negative! {probas}') input() if args.generate_sheet: today = datetime.utcnow() a_week_ago = today - timedelta(7) bug_ids = bugzilla.download_bugs_between(a_week_ago, today) print(f'Classifying {len(bug_ids)} bugs...') rows = [['Bug', f'{args.goal}(model)', args.goal, 'Title']] for bug in bugzilla.get_bugs(): if bug['id'] not in bug_ids: continue p = model.classify(bug, probabilities=True) rows.append([ f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}', 'y' if p[0][1] >= 0.7 else 'n', '', bug['summary'] ])
def main(args): model_file_name = "{}{}model".format( args.goal, "" if args.classifier == "default" else args.classifier) if args.goal == "component": if args.classifier == "default": model_class_name = "component" else: model_class_name = "component_nn" else: model_class_name = args.goal model_class = get_model_class(model_class_name) if args.train: db.download(bugzilla.BUGS_DB) db.download(repository.COMMITS_DB) historical_supported_tasks = [ "defect", "bugtype", "defectenhancementtask", "regression", ] if args.goal in historical_supported_tasks: model = model_class(args.lemmatization, args.historical) elif args.goal == "duplicate": model = model_class(args.training_set_size, args.lemmatization) else: model = model_class(args.lemmatization) model.train() else: model = model_class.load(model_file_name) if args.classify: for bug in bugzilla.get_bugs(): print( f'https://bugzilla.mozilla.org/show_bug.cgi?id={ bug["id"] } - { bug["summary"]} ' ) if model.calculate_importance: probas, importance = model.classify(bug, probabilities=True, importances=True) feature_names = model.get_feature_names() for i, (importance, index, is_positive) in enumerate(importance["importances"]): print( f'{i + 1}. \'{feature_names[int(index)]}\' ({"+" if (is_positive) else "-"}{importance})' ) else: probas = model.classify(bug, probabilities=True, importances=False) if np.argmax(probas) == 1: print(f"Positive! {probas}") else: print(f"Negative! {probas}") input() if args.generate_sheet: assert (args.token is not None ), "A Bugzilla token should be set in order to download bugs" today = datetime.utcnow() a_week_ago = today - timedelta(7) bugzilla.set_token(args.token) bugs = bugzilla.download_bugs_between(a_week_ago, today) print(f"Classifying {len(bugs)} bugs...") rows = [["Bug", f"{args.goal}(model)", args.goal, "Title"]] for bug in bugs: p = model.classify(bug, probabilities=True) rows.append([ f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}', "y" if p[0][1] >= 0.7 else "n", "", bug["summary"], ]) os.makedirs("sheets", exist_ok=True) with open( os.path.join( "sheets", f'{args.goal}-{datetime.utcnow().strftime("%Y-%m-%d")}-labels.csv', ), "w", ) as f: writer = csv.writer(f) writer.writerows(rows)
from bugbug import bugzilla from bugbug.models.duplicate import DuplicateModel m = DuplicateModel.load("duplicatemodel") REPORTERS_TO_IGNORE = { "*****@*****.**", "*****@*****.**" } try: with open("duplicate_test_bugs.json", "r") as f: test_bugs = json.load(f) except FileNotFoundError: test_bugs = bugzilla.download_bugs_between(datetime.now() - timedelta(days=21), datetime.now(), store=False) test_bugs = [ bug for bug in test_bugs if not bug["creator"] in REPORTERS_TO_IGNORE ] with open("duplicate_test_bugs.json", "w") as f: json.dump(test_bugs, f) bug_tuples = list(itertools.combinations(test_bugs, 2)) # Warning: Classifying all the test bugs takes a while probs = m.classify(bug_tuples, probabilities=True) with open("duplicate_predictions.csv", "w") as csvfile: spamwriter = csv.writer(csvfile)
probas = model.classify(bug, probabilities=True, importances=False) if np.argmax(probas) == 1: print(f"Positive! {probas}") else: print(f"Negative! {probas}") input() if args.generate_sheet: assert ( args.token is not None ), "A Bugzilla token should be set in order to download bugs" today = datetime.utcnow() a_week_ago = today - timedelta(7) bugzilla.set_token(args.token) bugs = bugzilla.download_bugs_between(a_week_ago, today) print(f"Classifying {len(bugs)} bugs...") rows = [["Bug", f"{args.goal}(model)", args.goal, "Title"]] for bug in bugs: p = model.classify(bug, probabilities=True) rows.append( [ f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]}', "y" if p[0][1] >= 0.7 else "n", "", bug["summary"], ] )