Example #1
0
    def set_commit(self, commit):
        self._labels = []

        isbugfix = False
        if commit.fixed_issue_ids is not None and len(
                commit.fixed_issue_ids) > 0:
            for issue in Issue.objects(id__in=commit.fixed_issue_ids):
                if issue.issue_type_verified and issue.issue_type_verified.lower(
                ) == 'bug':
                    isbugfix |= jira_is_resolved_and_fixed(issue)
                if issue.parent_issue_id:
                    parent_issue = Issue.objects(
                        id=issue.parent_issue_id).get()
                    if parent_issue.issue_type_verified and parent_issue.issue_type_verified.lower(
                    ) == 'bug':
                        isbugfix |= jira_is_resolved_and_fixed(parent_issue)
        self._labels.append(('bugfix', isbugfix))
Example #2
0
def _jira_isbugfix(issue):
    is_fixed_bug = False
    if not issue.issue_type:
        log.warning('could not find issue type for issue %s' % issue.id)
    else:
        if issue.issue_type and issue.issue_type.lower() == 'bug':
            is_fixed_bug = jira_is_resolved_and_fixed(issue)
    return is_fixed_bug
Example #3
0
def _is_jira_featureadd(issue):
    is_added_feature = False
    featureadd_types = set([
        'new feature', 'proposal', 'improvement', 'wish', 'planned work',
        'request'
    ])
    if not issue.issue_type:
        log.warning('could not find issue type for issue %s' % issue.id)
    else:
        if issue.issue_type and issue.issue_type.lower() in featureadd_types:
            is_added_feature = jira_is_resolved_and_fixed(issue)
    return is_added_feature
Example #4
0
    def write_bug_inducing(self,
                           label='validated_bugfix',
                           inducing_strategy='code_only',
                           java_only=True,
                           affected_versions=False,
                           ignore_refactorings=True,
                           name=None,
                           only_validated_bugfix_lines=False):
        """Write bug inducing information into FileAction.

        1. get all commits that are bug-fixing
        2. run blame for all files for all deleted lines in bug-fixing commits to find bug-inducing file actions and commits
        3. save to mongo_db
        """
        params = {
            'vcs_system_id': self._vcs_id,
            'labels__{}'.format(label): True,
            'parents__1__exists': False,
        }

        # depending on our label we restrict the selection to commits that contain linked issues in the respective list
        if label == 'validated_bugfix':
            params['fixed_issue_ids__0__exists'] = True
        elif label == 'adjustedszz_bugfix':
            params['szz_issue_ids__0__exists'] = True
        elif label == 'issueonly_bugfix':
            params['linked_issue_ids__0__exists'] = True
        elif label == 'issuefasttext_bugfix':
            params['linked_issue_ids__0__exists'] = True
        else:
            raise Exception('unknown label')

        all_changes = {}

        # fetch before instead of iterate over the cursor because of timeout
        bugfix_commit_ids = [
            c.id
            for c in Commit.objects.filter(**params).only('id').timeout(False)
        ]  # maybe list comprehension will close the cursor
        for bugfix_commit_id in bugfix_commit_ids:

            bugfix_commit = Commit.objects.only(
                'revision_hash', 'id', 'fixed_issue_ids', 'szz_issue_ids',
                'linked_issue_ids', 'committer_date').get(id=bugfix_commit_id)

            # only modified files
            for fa in FileAction.objects.filter(commit_id=bugfix_commit.id,
                                                mode='M').timeout(False):
                f = File.objects.get(id=fa.file_id)

                # only java files
                if java_only and not java_filename_filter(f.path.lower()):
                    continue

                if label == 'validated_bugfix':
                    fixed_issue_ids = bugfix_commit.fixed_issue_ids
                elif label == 'adjustedszz_bugfix':
                    fixed_issue_ids = bugfix_commit.szz_issue_ids
                elif label == 'issueonly_bugfix':
                    fixed_issue_ids = bugfix_commit.linked_issue_ids
                elif label == 'issuefasttext_bugfix':
                    fixed_issue_ids = bugfix_commit.linked_issue_ids
                else:
                    raise Exception('unknown label')

                # only issues that are really closed and fixed:
                issues = []
                for issue_id in fixed_issue_ids:
                    try:
                        issue = Issue.objects.get(id=issue_id)
                    except Issue.DoesNotExist:
                        continue

                    # issueonly_bugfix considers linked_issue_ids, those may contain non-bugs
                    if label in [
                            'issueonly_bugfix', 'adjustedszz_bugfix',
                            'issuefasttext_bugfix'
                    ] and str(issue.issue_type).lower() != 'bug':
                        continue

                    if not jira_is_resolved_and_fixed(issue):
                        continue

                    if label == 'validated_bugfix':
                        if not issue.issue_type_verified or issue.issue_type_verified.lower(
                        ) != 'bug':
                            continue

                    issues.append(issue)

                if not issues:
                    self._log.warn(
                        'skipping commit {} as none of its issue_ids {} are closed/fixed/resolved'
                        .format(bugfix_commit.revision_hash, fixed_issue_ids))
                    continue

                suspect_boundary_date = self._find_boundary_date(
                    issues, self._version_dates, affected_versions)

                # if ignore refactorings
                ignore_lines = False
                if ignore_refactorings:
                    # get lines where refactorings happened
                    # pass them to the blame call
                    ignore_lines = self.refactoring_lines(
                        bugfix_commit.id, fa.id)

                validated_bugfix_lines = False
                if only_validated_bugfix_lines:
                    validated_bugfix_lines = self.bug_fixing_lines(fa.id)

                # find bug inducing commits, add to our list for this commit and file
                for blame_commit, original_file in self._cg.blame(
                        bugfix_commit.revision_hash, f.path, inducing_strategy,
                        ignore_lines, validated_bugfix_lines):
                    blame_c = Commit.objects.only(
                        'id', 'committer_date',
                        'labels').get(vcs_system_id=self._vcs_id,
                                      revision_hash=blame_commit)

                    # every commit before our suspect boundary date is counted towards inducing
                    if blame_c.committer_date < suspect_boundary_date:
                        szz_type = 'inducing'

                    # every commit behind our boundary date is counted towards suspects
                    elif blame_c.committer_date >= suspect_boundary_date:
                        szz_type = 'suspect'

                        # if the suspect commit is also a bug-fix it is a partial fix
                        if label in blame_c.labels.keys(
                        ) and blame_c.labels[label] is True:
                            szz_type = 'partial_fix'

                    self._log.debug(
                        'blame commit date {} against boundary date {}, szz_type {}'
                        .format(blame_c.committer_date, suspect_boundary_date,
                                szz_type))
                    for blame_fa in FileAction.objects.filter(
                            commit_id=blame_c.id).timeout(False):
                        blame_f = File.objects.get(id=blame_fa.file_id)

                        if blame_f.path == original_file:
                            key = str(fa.id) + '_' + str(blame_fa.id)

                            if key not in all_changes.keys():
                                all_changes[key] = {
                                    'change_file_action_id': fa.id,
                                    'inducing_file_action': blame_fa.id,
                                    'label': label,
                                    'szz_type': szz_type,
                                    'inducing_strategy': inducing_strategy
                                }

        self._log.info('size of all changes: %s mb',
                       asizeof.asizeof(all_changes) / 1024 / 1024)

        # second run differenciate between hard and weak suspects
        new_types = {}
        self._log.debug(
            'starting second pass for distinguish hard and weak suspects')
        for change, values in all_changes.items():

            # every suspect starts as hard_suspect
            szz_type = 'hard_suspect'
            if values['szz_type'] == 'suspect':

                # is there a fix for this change which is not a suspect (which means it has to be a partial-fix or inducing)
                # we set this type to weak_suspect
                for change2, values2 in all_changes.items():

                    # skip equal
                    if change == change2:
                        continue

                    if values2['inducing_file_action'] == values[
                            'inducing_file_action'] and values2[
                                'szz_type'] != 'suspect':
                        szz_type = 'weak_suspect'
                        self._log.debug(
                            'found another inducing change for this inducing change which is not a suspect, we set szz_type to weak_suspect'
                        )
                new_types[change] = szz_type

        # write results
        self._log.debug('writing results')
        for change, values in all_changes.items():
            fa = FileAction.objects.get(id=values['inducing_file_action'])

            szz_type = values['szz_type']
            if szz_type == 'suspect':
                szz_type = new_types[change]

            to_write = {
                'change_file_action_id': values['change_file_action_id'],
                'szz_type': szz_type,
                # these values are defined by the name
                # 'label': values['label'],
                # 'inducing_strategy': inducing_strategy,
                # 'java_only': java_only,
                # 'affected_versions': affected_versions,
                'label': name
            }

            self._log.debug(to_write)
            # we clear everything with this label beforehand because we may re-run this plugin with a different label or strategy
            # new_list = []
            # for d in fa.induces:
            #     if d['label'] != label or d['inducing_strategy'] != inducing_strategy or d['java_only'] != java_only:  # keep values not matching our stuff
            #         new_list.append(d)

            # fa.induces = new_list
            # fa.induces = []  # this deletes everything, also previous runs with a different label

            if to_write not in fa.induces:
                fa.induces.append(to_write)
            fa.save()
Example #5
0
    def issues(self):
        """Load inducing file actions for labeling files accordingly.

        For all bug-fixing commits that happened after our chosen release:
        find bug-inducing commits via induced of FileAction

        - uses commit label: validated_bugfix
        - uses issues from: fixed_issue_ids (manually validated links from commit to issue)
        - uses inducing label: JLMIV+ (Jira Links Manual(JLM), Issue Validation(IV), only java files(+), skip comments and empty spaces in blame(+))

        ALL inducing commits of ALL bug fixing commits MUST have a path to the release.
        The only exceptions are partial fixes when the inducing commit without a path to the release is a bug fixing commit for the same issue.
        """
        buginducing_commits = {}
        skipped_issues = set()
        all_fixed_issues = set()

        for commit in Commit.objects.filter(
                vcs_system_id=self._vcs.id,
                committer_date__gt=self._release_date,
                labels__validated_bugfix=True,
                fixed_issue_ids__0__exists=True).only(
                    'id', 'committer_date', 'fixed_issue_ids',
                    'revision_hash').timeout(False):
            for issue in Issue.objects.filter(id__in=commit.fixed_issue_ids):
                if issue.issue_type_verified and issue.issue_type_verified.lower(
                ) == "bug" and jira_is_resolved_and_fixed(issue):
                    all_fixed_issues.add(issue)

        for issue in all_fixed_issues:
            inducings_have_path = True
            blame_commits = []

            for bugfix_commit in Commit.objects.filter(
                    vcs_system_id=self._vcs.id,
                    fixed_issue_ids=issue.id,
                    committer_date__gt=self._release_date).only(
                        'revision_hash', 'id', 'fixed_issue_ids',
                        'committer_date').timeout(False):

                for fa in FileAction.objects.filter(commit_id=bugfix_commit.id,
                                                    mode='M'):

                    # load bug_inducing FileActions
                    for ifa in FileAction.objects.filter(
                            induces__match={
                                'change_file_action_id': fa.id,
                                'label': 'JLMIV+R'
                            }):

                        # still need to fetch the correct one
                        for ind in ifa.induces:
                            if ind['change_file_action_id'] == fa.id and ind[
                                    'label'] == 'JLMIV+R' and ind[
                                        'szz_type'] != 'hard_suspect':

                                bc = Commit.objects.get(id=ifa.commit_id)
                                blame_commit = bc.revision_hash
                                blame_file = File.objects.get(
                                    id=ifa.file_id).path

                                blame_id = '{}_{}'.format(
                                    blame_commit, issue.external_id)

                                blame_commits.append(blame_id)

                                # if this inducing commit has no path to our release we skip it altogether
                                if not nx.has_path(self._graph, blame_commit,
                                                   self._target_release_hash):
                                    if bc.fixed_issue_ids is None or issue.id not in bc.fixed_issue_ids:
                                        inducings_have_path = False
                                        self._log.debug(
                                            '[{}] has no path to release, skipping issue: {}'
                                            .format(blame_commit,
                                                    issue.external_id))
                                else:
                                    # skip if we are not interested in the blame_file (because it does not point to a release file)
                                    if blame_file not in self._aliases.keys():
                                        if java_filename_filter(
                                                blame_file,
                                                production_only=True):
                                            self._log.debug(
                                                '[{}] {} not in release files or aliases {}, skipping issues: {}'
                                                .format(
                                                    blame_commit, blame_file,
                                                    self._aliases.keys(),
                                                    issue.external_id))
                                        skipped_issues.add(issue.external_id)
                                        continue

                                    if blame_id not in buginducing_commits.keys(
                                    ):
                                        buginducing_commits[blame_id] = {}

                                    if blame_file not in buginducing_commits[
                                            blame_id].keys():
                                        buginducing_commits[blame_id][
                                            blame_file] = []

                                    buginducing_commits[blame_id][
                                        blame_file].append(
                                            (issue.external_id,
                                             str(bugfix_commit.committer_date),
                                             bugfix_commit.revision_hash,
                                             str(issue.priority).lower(),
                                             str(issue.issue_type_verified).
                                             lower(), str(issue.created_at)))

            # not every blame commit has a path to the release
            # we need to remove all of them in this case
            if not inducings_have_path:
                skipped_issues.add(issue.external_id)
                for blame_id in blame_commits:
                    if blame_id in buginducing_commits.keys(
                    ):  # may not be present because its a hard_suspect
                        self._log.debug(
                            '[{}] found inducing commits wihtout path, deleting issue {}'
                            .format(
                                blame_id.split('_')[0],
                                blame_id.split('_')[1]))
                        del buginducing_commits[blame_id]
        ret_issues = {}
        for blame_commit, values in buginducing_commits.items():
            for file, issues in values.items():
                release_file = self._aliases[file]
                if release_file not in ret_issues.keys():
                    ret_issues[release_file] = []
                for i in issues:
                    if i not in ret_issues[release_file]:
                        ret_issues[release_file].append(i)
                        self._log.debug('adding issue {} to file {}'.format(
                            i[0], release_file))
                        if i[0] in skipped_issues:
                            skipped_issues.remove(i[0])
        return ret_issues
Example #6
0
    def issues_six_months_szzr(self):
        """basically looks six months into the future from the release and counts the defects that we can match

        returns a dict, {filename: [list of issues]}
        """

        files_release = self._release_files

        commit_graph = get_commit_graph(self._vcs.id)
        undirected_graph = commit_graph.to_undirected(as_view=True)
        rename_cache = {}
        delete_cache = {}

        all_fixed_issues = set()
        six_months = self._release_date + relativedelta(months=6)
        for commit in Commit.objects.filter(
                vcs_system_id=self._vcs.id,
                committer_date__gt=self._release_date,
                committer_date__lt=six_months,
                labels__issueonly_bugfix=True,
                linked_issue_ids__0__exists=True).only(
                    'id', 'committer_date', 'linked_issue_ids',
                    'revision_hash').timeout(False):
            for issue in Issue.objects.filter(id__in=commit.linked_issue_ids):
                if str(issue.issue_type).lower(
                ) == "bug" and jira_is_resolved_and_fixed(issue):
                    all_fixed_issues.add(issue)

        ret = {rfile: [] for rfile in files_release}

        for issue in all_fixed_issues:
            for bugfix_commit in Commit.objects.filter(
                    vcs_system_id=self._vcs.id,
                    linked_issue_ids=issue.id,
                    committer_date__gt=self._release_date,
                    labels__issueonly_bugfix=True,
                    committer_date__lt=six_months).only(
                        'revision_hash', 'id', 'linked_issue_ids',
                        'committer_date').timeout(False):

                # calculate if there are any inducings for this fa with the specific label
                # if yes then we consider it?

                changed_files = set()
                for fa in FileAction.objects.filter(commit_id=bugfix_commit.id,
                                                    mode='M'):

                    # check if we find at least one inducing to this fa
                    if not FileAction.objects.filter(
                            induces__match={
                                'change_file_action_id': fa.id,
                                'label': 'JL+R'
                            }).count() > 0:
                        continue

                    f = File.objects.get(id=fa.file_id)
                    if f.path not in changed_files and java_filename_filter(
                            f.path):
                        changed_files.add(f.path)

                if len(changed_files) > 0:
                    current_files, path_valid = self.calc_current_files(
                        bugfix_commit, self._release_commit, commit_graph,
                        undirected_graph, rename_cache, changed_files)

                    if path_valid and len(
                            current_files.intersection(files_release)) > 0:
                        for f in current_files:
                            if f in ret.keys():
                                inf = (issue.external_id,
                                       str(bugfix_commit.committer_date),
                                       bugfix_commit.revision_hash,
                                       str(issue.priority).lower(),
                                       str(issue.issue_type).lower(),
                                       str(issue.created_at))

                                if inf not in ret[f]:
                                    ret[f].append(inf)

        # todo
        # for all files changed in a bugfixing commit find the corresponding names of the file in the release and count those
        # bugs toward the file name
        return ret
Example #7
0
    def issues_six_months_szz(self):
        """basically looks six months into the future from the release and counts the defects that we can match

        returns a dict, {filename: [list of issues]}
        """

        files_release = self._release_files

        commit_graph = get_commit_graph(self._vcs.id)
        undirected_graph = commit_graph.to_undirected(as_view=True)
        rename_cache = {}
        delete_cache = {}

        all_fixed_issues = set()
        six_months = self._release_date + relativedelta(months=6)
        for commit in Commit.objects.filter(
                vcs_system_id=self._vcs.id,
                committer_date__gt=self._release_date,
                committer_date__lt=six_months,
                labels__adjustedszz_bugfix=True,
                szz_issue_ids__0__exists=True).only(
                    'id', 'committer_date', 'szz_issue_ids',
                    'revision_hash').timeout(False):
            for issue in Issue.objects.filter(id__in=commit.szz_issue_ids):
                if str(issue.issue_type).lower(
                ) == "bug" and jira_is_resolved_and_fixed(issue):
                    all_fixed_issues.add(issue)

        ret = {rfile: [] for rfile in files_release}

        for issue in all_fixed_issues:
            for bugfix_commit in Commit.objects.filter(
                    vcs_system_id=self._vcs.id,
                    labels__adjustedszz_bugfix=True,
                    szz_issue_ids=issue.id,
                    committer_date__gt=self._release_date,
                    committer_date__lt=six_months).only(
                        'revision_hash', 'id', 'szz_issue_ids',
                        'committer_date').timeout(False):

                # in comparison to issues_six_months_szzr() we skip the inducing step and just use every bugfix commit within 6 months window
                changed_files = set()
                for fa in FileAction.objects.filter(commit_id=bugfix_commit.id,
                                                    mode='M'):

                    f = File.objects.get(id=fa.file_id)
                    if f.path not in changed_files and java_filename_filter(
                            f.path):
                        changed_files.add(f.path)

                current_files = None
                if current_files is None:
                    current_files, path_valid = self.calc_current_files(
                        bugfix_commit, self._release_commit, commit_graph,
                        undirected_graph, rename_cache, changed_files)

                    if path_valid and len(
                            current_files.intersection(files_release)) > 0:
                        for f in current_files:
                            if f in ret.keys():
                                inf = (issue.external_id,
                                       str(bugfix_commit.committer_date),
                                       bugfix_commit.revision_hash,
                                       str(issue.priority).lower(),
                                       str(issue.issue_type).lower(),
                                       str(issue.created_at))

                                if inf not in ret[f]:
                                    ret[f].append(inf)

        # todo
        # for all files changed in a bugfixing commit find the corresponding names of the file in the release and count those
        # bugs toward the file name
        return ret