def set_commit(self, commit): self._labels = [] isbugfix = False if commit.fixed_issue_ids is not None and len( commit.fixed_issue_ids) > 0: for issue in Issue.objects(id__in=commit.fixed_issue_ids): if issue.issue_type_verified and issue.issue_type_verified.lower( ) == 'bug': isbugfix |= jira_is_resolved_and_fixed(issue) if issue.parent_issue_id: parent_issue = Issue.objects( id=issue.parent_issue_id).get() if parent_issue.issue_type_verified and parent_issue.issue_type_verified.lower( ) == 'bug': isbugfix |= jira_is_resolved_and_fixed(parent_issue) self._labels.append(('bugfix', isbugfix))
def _jira_isbugfix(issue): is_fixed_bug = False if not issue.issue_type: log.warning('could not find issue type for issue %s' % issue.id) else: if issue.issue_type and issue.issue_type.lower() == 'bug': is_fixed_bug = jira_is_resolved_and_fixed(issue) return is_fixed_bug
def _is_jira_featureadd(issue): is_added_feature = False featureadd_types = set([ 'new feature', 'proposal', 'improvement', 'wish', 'planned work', 'request' ]) if not issue.issue_type: log.warning('could not find issue type for issue %s' % issue.id) else: if issue.issue_type and issue.issue_type.lower() in featureadd_types: is_added_feature = jira_is_resolved_and_fixed(issue) return is_added_feature
def write_bug_inducing(self, label='validated_bugfix', inducing_strategy='code_only', java_only=True, affected_versions=False, ignore_refactorings=True, name=None, only_validated_bugfix_lines=False): """Write bug inducing information into FileAction. 1. get all commits that are bug-fixing 2. run blame for all files for all deleted lines in bug-fixing commits to find bug-inducing file actions and commits 3. save to mongo_db """ params = { 'vcs_system_id': self._vcs_id, 'labels__{}'.format(label): True, 'parents__1__exists': False, } # depending on our label we restrict the selection to commits that contain linked issues in the respective list if label == 'validated_bugfix': params['fixed_issue_ids__0__exists'] = True elif label == 'adjustedszz_bugfix': params['szz_issue_ids__0__exists'] = True elif label == 'issueonly_bugfix': params['linked_issue_ids__0__exists'] = True elif label == 'issuefasttext_bugfix': params['linked_issue_ids__0__exists'] = True else: raise Exception('unknown label') all_changes = {} # fetch before instead of iterate over the cursor because of timeout bugfix_commit_ids = [ c.id for c in Commit.objects.filter(**params).only('id').timeout(False) ] # maybe list comprehension will close the cursor for bugfix_commit_id in bugfix_commit_ids: bugfix_commit = Commit.objects.only( 'revision_hash', 'id', 'fixed_issue_ids', 'szz_issue_ids', 'linked_issue_ids', 'committer_date').get(id=bugfix_commit_id) # only modified files for fa in FileAction.objects.filter(commit_id=bugfix_commit.id, mode='M').timeout(False): f = File.objects.get(id=fa.file_id) # only java files if java_only and not java_filename_filter(f.path.lower()): continue if label == 'validated_bugfix': fixed_issue_ids = bugfix_commit.fixed_issue_ids elif label == 'adjustedszz_bugfix': fixed_issue_ids = bugfix_commit.szz_issue_ids elif label == 'issueonly_bugfix': fixed_issue_ids = bugfix_commit.linked_issue_ids elif label == 'issuefasttext_bugfix': fixed_issue_ids = bugfix_commit.linked_issue_ids else: raise Exception('unknown label') # only issues that are really closed and fixed: issues = [] for issue_id in fixed_issue_ids: try: issue = Issue.objects.get(id=issue_id) except Issue.DoesNotExist: continue # issueonly_bugfix considers linked_issue_ids, those may contain non-bugs if label in [ 'issueonly_bugfix', 'adjustedszz_bugfix', 'issuefasttext_bugfix' ] and str(issue.issue_type).lower() != 'bug': continue if not jira_is_resolved_and_fixed(issue): continue if label == 'validated_bugfix': if not issue.issue_type_verified or issue.issue_type_verified.lower( ) != 'bug': continue issues.append(issue) if not issues: self._log.warn( 'skipping commit {} as none of its issue_ids {} are closed/fixed/resolved' .format(bugfix_commit.revision_hash, fixed_issue_ids)) continue suspect_boundary_date = self._find_boundary_date( issues, self._version_dates, affected_versions) # if ignore refactorings ignore_lines = False if ignore_refactorings: # get lines where refactorings happened # pass them to the blame call ignore_lines = self.refactoring_lines( bugfix_commit.id, fa.id) validated_bugfix_lines = False if only_validated_bugfix_lines: validated_bugfix_lines = self.bug_fixing_lines(fa.id) # find bug inducing commits, add to our list for this commit and file for blame_commit, original_file in self._cg.blame( bugfix_commit.revision_hash, f.path, inducing_strategy, ignore_lines, validated_bugfix_lines): blame_c = Commit.objects.only( 'id', 'committer_date', 'labels').get(vcs_system_id=self._vcs_id, revision_hash=blame_commit) # every commit before our suspect boundary date is counted towards inducing if blame_c.committer_date < suspect_boundary_date: szz_type = 'inducing' # every commit behind our boundary date is counted towards suspects elif blame_c.committer_date >= suspect_boundary_date: szz_type = 'suspect' # if the suspect commit is also a bug-fix it is a partial fix if label in blame_c.labels.keys( ) and blame_c.labels[label] is True: szz_type = 'partial_fix' self._log.debug( 'blame commit date {} against boundary date {}, szz_type {}' .format(blame_c.committer_date, suspect_boundary_date, szz_type)) for blame_fa in FileAction.objects.filter( commit_id=blame_c.id).timeout(False): blame_f = File.objects.get(id=blame_fa.file_id) if blame_f.path == original_file: key = str(fa.id) + '_' + str(blame_fa.id) if key not in all_changes.keys(): all_changes[key] = { 'change_file_action_id': fa.id, 'inducing_file_action': blame_fa.id, 'label': label, 'szz_type': szz_type, 'inducing_strategy': inducing_strategy } self._log.info('size of all changes: %s mb', asizeof.asizeof(all_changes) / 1024 / 1024) # second run differenciate between hard and weak suspects new_types = {} self._log.debug( 'starting second pass for distinguish hard and weak suspects') for change, values in all_changes.items(): # every suspect starts as hard_suspect szz_type = 'hard_suspect' if values['szz_type'] == 'suspect': # is there a fix for this change which is not a suspect (which means it has to be a partial-fix or inducing) # we set this type to weak_suspect for change2, values2 in all_changes.items(): # skip equal if change == change2: continue if values2['inducing_file_action'] == values[ 'inducing_file_action'] and values2[ 'szz_type'] != 'suspect': szz_type = 'weak_suspect' self._log.debug( 'found another inducing change for this inducing change which is not a suspect, we set szz_type to weak_suspect' ) new_types[change] = szz_type # write results self._log.debug('writing results') for change, values in all_changes.items(): fa = FileAction.objects.get(id=values['inducing_file_action']) szz_type = values['szz_type'] if szz_type == 'suspect': szz_type = new_types[change] to_write = { 'change_file_action_id': values['change_file_action_id'], 'szz_type': szz_type, # these values are defined by the name # 'label': values['label'], # 'inducing_strategy': inducing_strategy, # 'java_only': java_only, # 'affected_versions': affected_versions, 'label': name } self._log.debug(to_write) # we clear everything with this label beforehand because we may re-run this plugin with a different label or strategy # new_list = [] # for d in fa.induces: # if d['label'] != label or d['inducing_strategy'] != inducing_strategy or d['java_only'] != java_only: # keep values not matching our stuff # new_list.append(d) # fa.induces = new_list # fa.induces = [] # this deletes everything, also previous runs with a different label if to_write not in fa.induces: fa.induces.append(to_write) fa.save()
def issues(self): """Load inducing file actions for labeling files accordingly. For all bug-fixing commits that happened after our chosen release: find bug-inducing commits via induced of FileAction - uses commit label: validated_bugfix - uses issues from: fixed_issue_ids (manually validated links from commit to issue) - uses inducing label: JLMIV+ (Jira Links Manual(JLM), Issue Validation(IV), only java files(+), skip comments and empty spaces in blame(+)) ALL inducing commits of ALL bug fixing commits MUST have a path to the release. The only exceptions are partial fixes when the inducing commit without a path to the release is a bug fixing commit for the same issue. """ buginducing_commits = {} skipped_issues = set() all_fixed_issues = set() for commit in Commit.objects.filter( vcs_system_id=self._vcs.id, committer_date__gt=self._release_date, labels__validated_bugfix=True, fixed_issue_ids__0__exists=True).only( 'id', 'committer_date', 'fixed_issue_ids', 'revision_hash').timeout(False): for issue in Issue.objects.filter(id__in=commit.fixed_issue_ids): if issue.issue_type_verified and issue.issue_type_verified.lower( ) == "bug" and jira_is_resolved_and_fixed(issue): all_fixed_issues.add(issue) for issue in all_fixed_issues: inducings_have_path = True blame_commits = [] for bugfix_commit in Commit.objects.filter( vcs_system_id=self._vcs.id, fixed_issue_ids=issue.id, committer_date__gt=self._release_date).only( 'revision_hash', 'id', 'fixed_issue_ids', 'committer_date').timeout(False): for fa in FileAction.objects.filter(commit_id=bugfix_commit.id, mode='M'): # load bug_inducing FileActions for ifa in FileAction.objects.filter( induces__match={ 'change_file_action_id': fa.id, 'label': 'JLMIV+R' }): # still need to fetch the correct one for ind in ifa.induces: if ind['change_file_action_id'] == fa.id and ind[ 'label'] == 'JLMIV+R' and ind[ 'szz_type'] != 'hard_suspect': bc = Commit.objects.get(id=ifa.commit_id) blame_commit = bc.revision_hash blame_file = File.objects.get( id=ifa.file_id).path blame_id = '{}_{}'.format( blame_commit, issue.external_id) blame_commits.append(blame_id) # if this inducing commit has no path to our release we skip it altogether if not nx.has_path(self._graph, blame_commit, self._target_release_hash): if bc.fixed_issue_ids is None or issue.id not in bc.fixed_issue_ids: inducings_have_path = False self._log.debug( '[{}] has no path to release, skipping issue: {}' .format(blame_commit, issue.external_id)) else: # skip if we are not interested in the blame_file (because it does not point to a release file) if blame_file not in self._aliases.keys(): if java_filename_filter( blame_file, production_only=True): self._log.debug( '[{}] {} not in release files or aliases {}, skipping issues: {}' .format( blame_commit, blame_file, self._aliases.keys(), issue.external_id)) skipped_issues.add(issue.external_id) continue if blame_id not in buginducing_commits.keys( ): buginducing_commits[blame_id] = {} if blame_file not in buginducing_commits[ blame_id].keys(): buginducing_commits[blame_id][ blame_file] = [] buginducing_commits[blame_id][ blame_file].append( (issue.external_id, str(bugfix_commit.committer_date), bugfix_commit.revision_hash, str(issue.priority).lower(), str(issue.issue_type_verified). lower(), str(issue.created_at))) # not every blame commit has a path to the release # we need to remove all of them in this case if not inducings_have_path: skipped_issues.add(issue.external_id) for blame_id in blame_commits: if blame_id in buginducing_commits.keys( ): # may not be present because its a hard_suspect self._log.debug( '[{}] found inducing commits wihtout path, deleting issue {}' .format( blame_id.split('_')[0], blame_id.split('_')[1])) del buginducing_commits[blame_id] ret_issues = {} for blame_commit, values in buginducing_commits.items(): for file, issues in values.items(): release_file = self._aliases[file] if release_file not in ret_issues.keys(): ret_issues[release_file] = [] for i in issues: if i not in ret_issues[release_file]: ret_issues[release_file].append(i) self._log.debug('adding issue {} to file {}'.format( i[0], release_file)) if i[0] in skipped_issues: skipped_issues.remove(i[0]) return ret_issues
def issues_six_months_szzr(self): """basically looks six months into the future from the release and counts the defects that we can match returns a dict, {filename: [list of issues]} """ files_release = self._release_files commit_graph = get_commit_graph(self._vcs.id) undirected_graph = commit_graph.to_undirected(as_view=True) rename_cache = {} delete_cache = {} all_fixed_issues = set() six_months = self._release_date + relativedelta(months=6) for commit in Commit.objects.filter( vcs_system_id=self._vcs.id, committer_date__gt=self._release_date, committer_date__lt=six_months, labels__issueonly_bugfix=True, linked_issue_ids__0__exists=True).only( 'id', 'committer_date', 'linked_issue_ids', 'revision_hash').timeout(False): for issue in Issue.objects.filter(id__in=commit.linked_issue_ids): if str(issue.issue_type).lower( ) == "bug" and jira_is_resolved_and_fixed(issue): all_fixed_issues.add(issue) ret = {rfile: [] for rfile in files_release} for issue in all_fixed_issues: for bugfix_commit in Commit.objects.filter( vcs_system_id=self._vcs.id, linked_issue_ids=issue.id, committer_date__gt=self._release_date, labels__issueonly_bugfix=True, committer_date__lt=six_months).only( 'revision_hash', 'id', 'linked_issue_ids', 'committer_date').timeout(False): # calculate if there are any inducings for this fa with the specific label # if yes then we consider it? changed_files = set() for fa in FileAction.objects.filter(commit_id=bugfix_commit.id, mode='M'): # check if we find at least one inducing to this fa if not FileAction.objects.filter( induces__match={ 'change_file_action_id': fa.id, 'label': 'JL+R' }).count() > 0: continue f = File.objects.get(id=fa.file_id) if f.path not in changed_files and java_filename_filter( f.path): changed_files.add(f.path) if len(changed_files) > 0: current_files, path_valid = self.calc_current_files( bugfix_commit, self._release_commit, commit_graph, undirected_graph, rename_cache, changed_files) if path_valid and len( current_files.intersection(files_release)) > 0: for f in current_files: if f in ret.keys(): inf = (issue.external_id, str(bugfix_commit.committer_date), bugfix_commit.revision_hash, str(issue.priority).lower(), str(issue.issue_type).lower(), str(issue.created_at)) if inf not in ret[f]: ret[f].append(inf) # todo # for all files changed in a bugfixing commit find the corresponding names of the file in the release and count those # bugs toward the file name return ret
def issues_six_months_szz(self): """basically looks six months into the future from the release and counts the defects that we can match returns a dict, {filename: [list of issues]} """ files_release = self._release_files commit_graph = get_commit_graph(self._vcs.id) undirected_graph = commit_graph.to_undirected(as_view=True) rename_cache = {} delete_cache = {} all_fixed_issues = set() six_months = self._release_date + relativedelta(months=6) for commit in Commit.objects.filter( vcs_system_id=self._vcs.id, committer_date__gt=self._release_date, committer_date__lt=six_months, labels__adjustedszz_bugfix=True, szz_issue_ids__0__exists=True).only( 'id', 'committer_date', 'szz_issue_ids', 'revision_hash').timeout(False): for issue in Issue.objects.filter(id__in=commit.szz_issue_ids): if str(issue.issue_type).lower( ) == "bug" and jira_is_resolved_and_fixed(issue): all_fixed_issues.add(issue) ret = {rfile: [] for rfile in files_release} for issue in all_fixed_issues: for bugfix_commit in Commit.objects.filter( vcs_system_id=self._vcs.id, labels__adjustedszz_bugfix=True, szz_issue_ids=issue.id, committer_date__gt=self._release_date, committer_date__lt=six_months).only( 'revision_hash', 'id', 'szz_issue_ids', 'committer_date').timeout(False): # in comparison to issues_six_months_szzr() we skip the inducing step and just use every bugfix commit within 6 months window changed_files = set() for fa in FileAction.objects.filter(commit_id=bugfix_commit.id, mode='M'): f = File.objects.get(id=fa.file_id) if f.path not in changed_files and java_filename_filter( f.path): changed_files.add(f.path) current_files = None if current_files is None: current_files, path_valid = self.calc_current_files( bugfix_commit, self._release_commit, commit_graph, undirected_graph, rename_cache, changed_files) if path_valid and len( current_files.intersection(files_release)) > 0: for f in current_files: if f in ret.keys(): inf = (issue.external_id, str(bugfix_commit.committer_date), bugfix_commit.revision_hash, str(issue.priority).lower(), str(issue.issue_type).lower(), str(issue.created_at)) if inf not in ret[f]: ret[f].append(inf) # todo # for all files changed in a bugfixing commit find the corresponding names of the file in the release and count those # bugs toward the file name return ret