def fetch_bugs(self): logger.info('Fetching Bugzilla bugs ...') lower, upper = self._get_bounds() if lower > upper: logger.info('Found cache that says bugs already fetched. Skipping.') return total = upper - lower x = {'failed': 0, 'completed': 0} def write(bug): if bug == 'failed': x['failed'] += 1 elif bug: self._on_bug(bug) x['completed'] += 1 print_progressbar(x['completed'], total - x['failed']) print_progressbar(0, total) self._run(lower, upper, write) logger.info('\n--> completed with ' + str(x['failed']) + ' failed bug fetches: ')
def write(bug): if bug == 'failed': x['failed'] += 1 elif bug: self._on_bug(bug) x['completed'] += 1 print_progressbar(x['completed'], total - x['failed'])
def evaluate(self, prediction_func, test_indices, prefix=''): top_5_acc = [0.0, 0.0, 0.0, 0.0, 0.0] ft_change = [0.0, 0.0, 0.0, 0.0, 0.0] total_ft = 0 predictions_x = [] predictions_y = [] total = len(test_indices) print('evaluating ' + prefix + ' ...') for count, index in enumerate(test_indices): print_progressbar(count, total) test_x = self._c['data'][index] test_y = self._c['target'][index] prediction = prediction_func(test_x) if test_y in prediction: hit = prediction.index(test_y) predictions_x.append(prediction) predictions_y.append(hit) for i in range(hit, len(top_5_acc)): top_5_acc[i] += 1.0 fix_time = self._ft_estimator.actual_ft(test_x['bug_id']) total_ft += fix_time lowest_ft = None for _index in range(len(ft_change)): estimation = self._ft_estimator.predict( prediction[_index], test_x['vector']) change = fix_time if _index == 0: lowest_ft = change if estimation and prediction[_index] != test_y: change += fix_time - estimation if change < lowest_ft: lowest_ft = change ft_change[_index] += lowest_ft top_5_acc_perc = [ str(round(Decimal(acc / total), 2)) for acc in top_5_acc ] fix_time_perc = [ str(round(Decimal(1 - ft / total_ft), 2)) for ft in ft_change ] print('\n' + prefix + ' accuracy: ' + ', '.join(top_5_acc_perc)) print(prefix + ' fix time reduction [%]: ' + ', '.join(fix_time_perc)) return predictions_x, predictions_y
def update(self): bugs = self._storage.get_existing_full() total_length = len(bugs) for index, bug in enumerate(bugs): print_progressbar(index, total_length) if bug['assignee_email'][0] == '(' and bug['assignee_email'][ -1] == ')': email = bug['assignee_email'].split(',')[0][1:] self._storage.update_github(bug['id'], email)
def fetch_all(self): repo = self._g.get_repo(self._owner + '/' + self._repo) on_bug = self._get_on_bug_handler() count = 0 start_time = time.time() labels = [repo.get_label(label) for label in self._bug_labels] # get issues starting with the oldest to make the date checkpoints work issues = repo.get_issues(labels=labels, sort="updated", state="all", direction="asc", since=self._get_checkpoint()) length = estimate_total_count(issues) if length: logger.info('Fetching about ' + str(length) + ' GitHub issues from ' + self._remote_url) else: logger.info('Fetching GitHub issues from ' + self._remote_url) for issue in issues: if not issue.pull_request: count += 1 print_progressbar(count, length) bug = GithubBug(issue) on_bug(bug) logger.info('Importing {} issues took {:.1f} seconds'.format( count, time.time() - start_time))
def write(self, bug): self._processed += 1 print_progressbar(self._processed, self._total) self._callback(bug)