Ejemplo n.º 1
0
def test_extract_edits_1(git_repo_dir):
    commit_hash = 'b17c2c321ce8d299de3d063ca0a1b0b363477505'
    filename = 'first_lines.txt'

    extraction_settings = {
        'use_blocks': False,
        'blame_options':
        ['-C', '-C', '-C4', '--show-number', '--line-porcelain'],
        'extract_complexity': True,
        'extract_text': True
    }
    git_repo = pydriller.Git(git_repo_dir)
    commit = git_repo.get_commit(commit_hash)
    for mod in commit.modified_files:
        if mod.filename == filename:
            df = git2net.extraction._extract_edits(git_repo, commit, mod,
                                                   extraction_settings)

    assert len(df) == 3
    assert df.at[
        0,
        'original_commit_addition'] == 'e4448e87541d19d139b9d033b2578941a53d1f97'
    assert df.at[
        1,
        'original_commit_addition'] == '6b531fcb57d5b9d98dd983cb65357d82ccca647b'
    assert df.at[
        2,
        'original_commit_addition'] == None  # as there is no match due to line ending
Ejemplo n.º 2
0
def _create_corresponding_bug(
        closing_commit: pygit2.Commit,
        project_repo: pygit2.Repository,
        issue_id: tp.Optional[int] = None,
        creation_date: tp.Optional[datetime] = None,
        resolution_date: tp.Optional[datetime] = None) -> PygitBug:
    """
    Create the bug corresponding to a given closing commit.

    Applies simple SZZ algorithm as implemented in pydriller to find introducing
    commits.

    Args:
        closing_commit: commit closing the bug.
        project_repo: pygit2 repository of the project
        issue_id: optional issue number related to the bug

    Returns:
        the specified bug
    """
    pydrill_repo = pydriller.Git(project_repo.path)

    introducing_commits: tp.Set[pygit2.Commit] = set()
    blame_dict = pydrill_repo.get_commits_last_modified_lines(
        pydrill_repo.get_commit(str(closing_commit.id)))

    for _, introducing_set in blame_dict.items():
        for introducing_id in introducing_set:
            introducing_commits.add(project_repo.get(introducing_id))

    return PygitBug(closing_commit, introducing_commits, issue_id,
                    creation_date, resolution_date)
Ejemplo n.º 3
0
def _find_corresponding_pygit_suspect_tuple(
        project_name: str,
        issue_event: IssueEvent) -> tp.Optional[PygitSuspectTuple]:
    """
    Creates a suspect tuple given an issue event.

    Partitions the commits found via git blame on the fixing commit into
    suspects (commits after bug report) and non-suspects (commits before bug
    report).

    Args:
        project_name: Name of the project to draw the fixing and introducing
            commits from.
        issue_event: The IssueEvent potentially associated with a bug.

    Returns:
        A PygitSuspectTuple if the issue event represents the closing of a bug,
        None otherwise
    """
    pygit_repo: pygit2.Repository = get_local_project_git(project_name)
    pydrill_repo = pydriller.Git(pygit_repo.path)

    if _has_closed_a_bug(issue_event) and issue_event.commit_id:
        issue_date = issue_event.issue.created_at
        fixing_commit = pygit_repo.get(issue_event.commit_id)
        pydrill_fixing_commit = pydrill_repo.get_commit(issue_event.commit_id)
        blame_dict = pydrill_repo.get_commits_last_modified_lines(
            pydrill_fixing_commit)

        non_suspect_commits = set()
        suspect_commits = set()
        for introducing_set in blame_dict.values():
            for introducing_id in introducing_set:
                issue_date = issue_event.issue.created_at.astimezone(
                    timezone.utc)
                introduction_date = pydrill_repo.get_commit(
                    introducing_id).committer_date.astimezone(timezone.utc)

                if introduction_date > issue_date:  # commit is a suspect
                    suspect_commits.add(pygit_repo.get(introducing_id))
                else:
                    non_suspect_commits.add(pygit_repo.get(introducing_id))

        return PygitSuspectTuple(fixing_commit, non_suspect_commits,
                                 suspect_commits, issue_event.issue.number,
                                 issue_event.issue.created_at,
                                 pydrill_fixing_commit.committer_date)
    return None
Ejemplo n.º 4
0
def test_extract_edits_2(git_repo_dir):
    commit_hash = 'b17c2c321ce8d299de3d063ca0a1b0b363477505'
    filename = 'first_lines.txt'

    extraction_settings = {
        'use_blocks': True,
        'blame_options':
        ['-C', '-C', '-C4', '--show-number', '--line-porcelain'],
        'extract_complexity': True,
        'extract_text': True
    }

    git_repo = pydriller.Git(git_repo_dir)
    commit = git_repo.get_commit(commit_hash)
    df = None
    for mod in commit.modified_files:
        if mod.filename == filename:
            df = git2net.extraction._extract_edits(git_repo, commit, mod,
                                                   extraction_settings)
    assert len(df) == 1
    assert df.at[0,
                 'original_commit_addition'] == 'not available with use_blocks'
Ejemplo n.º 5
0
def test_identify_edits(git_repo_dir):
    commit_hash = 'f343ed53ee64717f85135c4b8d3f6bd018be80ad'
    filename = 'text_file.txt'

    extraction_settings = {'use_blocks': False}

    git_repo = pydriller.Git(git_repo_dir)
    commit = git_repo.get_commit(commit_hash)
    for x in commit.modified_files:
        if x.filename == filename:
            mod = x

    parsed_lines = mod.diff_parsed

    deleted_lines = {x[0]: x[1] for x in parsed_lines['deleted']}
    added_lines = {x[0]: x[1] for x in parsed_lines['added']}

    _, edits = git2net.extraction._identify_edits(deleted_lines, added_lines,
                                                  extraction_settings)
    assert list(edits.type) == [
        'deletion', 'replacement', 'deletion', 'replacement', 'addition',
        'addition', 'addition'
    ]
Ejemplo n.º 6
0
def _compute_complexity_measures(args):
    """
    Computes a set of complexity measures for a given commit/file combination.
    
    :param dict args: dictionary with the following key/value pairs:
        * **git_repo_dir** (*str*) – path to the git repository that is analysed
        * **commit_hash** (*str*) – hash of the commit that is processed
        * **old_path** (*str*) – path to the analysed file before the commit
        * **new_path** (*str*) – path to the analysed file after the commit
        * **events** (*int*) – number of edit events in the commit/file pair
        * **levenshtein_distance** (*int*) – total Levenshtein distance in the commit/file pair
    
    :return:
        *pandas.DataFrame* – dataframe containing identifying information and the computed
            complexity for the commit/file combination.
    """

    filename_old = args['old_path'].split('/')[-1]
    filename_new = args['new_path'].split('/')[-1]
    if filename_new != 'None':
        filename = filename_new
    else:
        filename = filename_old

    result = {
        'commit_hash': args['commit_hash'],
        'old_path': args['old_path'],
        'new_path': args['new_path'],
        'events': args['events'],
        'levenshtein_distance': args['levenshtein_distance'],
        'HE_pre': None,
        'CCN_pre': None,
        'NLOC_pre': None,
        'TOK_pre': None,
        'FUN_pre': None,
        'HE_post': None,
        'CCN_post': None,
        'NLOC_post': None,
        'TOK_post': None,
        'FUN_post': None,
        'HE_delta': None,
        'CCN_delta': None,
        'NLOC_delta': None,
        'TOK_delta': None,
        'FUN_delta': None
    }

    with git_init_lock:
        pydriller_repo = pydriller.Git(args['git_repo_dir'])
        pydriller_commit = pydriller_repo.get_commit(args['commit_hash'])

    found = False
    for m in pydriller_commit.modified_files:
        if m.filename == filename:
            found = True
            break

    if found:
        if pd.notnull(m.source_code_before):
            result['HE_pre'] = _compute_halstead_effort(
                m.old_path, m.source_code_before)
            l_before = lizard.analyze_file.analyze_source_code(
                m.old_path, m.source_code_before)
            result['CCN_pre'] = l_before.CCN
            result['NLOC_pre'] = l_before.nloc
            result['TOK_pre'] = l_before.token_count
            result['FUN_pre'] = len(l_before.function_list)
        else:
            result['HE_pre'] = 0
            result['CCN_pre'] = 0
            result['NLOC_pre'] = 0
            result['TOK_pre'] = 0
            result['FUN_pre'] = 0

        if pd.notnull(m.source_code):
            result['HE_post'] = _compute_halstead_effort(
                m.new_path, m.source_code)
            l_after = lizard.analyze_file.analyze_source_code(
                m.new_path, m.source_code)
            result['CCN_post'] = l_after.CCN
            result['NLOC_post'] = l_after.nloc
            result['TOK_post'] = l_after.token_count
            result['FUN_post'] = len(l_after.function_list)
        else:
            result['HE_post'] = 0
            result['CCN_post'] = 0
            result['NLOC_post'] = 0
            result['TOK_post'] = 0
            result['FUN_post'] = 0

    result['HE_delta'] = result['HE_post'] - result['HE_pre']
    result['CCN_delta'] = result['CCN_post'] - result['CCN_pre']
    result['NLOC_delta'] = result['NLOC_post'] - result['NLOC_pre']
    result['TOK_delta'] = result['TOK_post'] - result['TOK_pre']
    result['FUN_delta'] = result['FUN_post'] - result['FUN_pre']

    result_df = pd.DataFrame(result, index=[0])

    return result_df