def test_extract_edits_1(git_repo_dir): commit_hash = 'b17c2c321ce8d299de3d063ca0a1b0b363477505' filename = 'first_lines.txt' extraction_settings = { 'use_blocks': False, 'blame_options': ['-C', '-C', '-C4', '--show-number', '--line-porcelain'], 'extract_complexity': True, 'extract_text': True } git_repo = pydriller.Git(git_repo_dir) commit = git_repo.get_commit(commit_hash) for mod in commit.modified_files: if mod.filename == filename: df = git2net.extraction._extract_edits(git_repo, commit, mod, extraction_settings) assert len(df) == 3 assert df.at[ 0, 'original_commit_addition'] == 'e4448e87541d19d139b9d033b2578941a53d1f97' assert df.at[ 1, 'original_commit_addition'] == '6b531fcb57d5b9d98dd983cb65357d82ccca647b' assert df.at[ 2, 'original_commit_addition'] == None # as there is no match due to line ending
def _create_corresponding_bug( closing_commit: pygit2.Commit, project_repo: pygit2.Repository, issue_id: tp.Optional[int] = None, creation_date: tp.Optional[datetime] = None, resolution_date: tp.Optional[datetime] = None) -> PygitBug: """ Create the bug corresponding to a given closing commit. Applies simple SZZ algorithm as implemented in pydriller to find introducing commits. Args: closing_commit: commit closing the bug. project_repo: pygit2 repository of the project issue_id: optional issue number related to the bug Returns: the specified bug """ pydrill_repo = pydriller.Git(project_repo.path) introducing_commits: tp.Set[pygit2.Commit] = set() blame_dict = pydrill_repo.get_commits_last_modified_lines( pydrill_repo.get_commit(str(closing_commit.id))) for _, introducing_set in blame_dict.items(): for introducing_id in introducing_set: introducing_commits.add(project_repo.get(introducing_id)) return PygitBug(closing_commit, introducing_commits, issue_id, creation_date, resolution_date)
def _find_corresponding_pygit_suspect_tuple( project_name: str, issue_event: IssueEvent) -> tp.Optional[PygitSuspectTuple]: """ Creates a suspect tuple given an issue event. Partitions the commits found via git blame on the fixing commit into suspects (commits after bug report) and non-suspects (commits before bug report). Args: project_name: Name of the project to draw the fixing and introducing commits from. issue_event: The IssueEvent potentially associated with a bug. Returns: A PygitSuspectTuple if the issue event represents the closing of a bug, None otherwise """ pygit_repo: pygit2.Repository = get_local_project_git(project_name) pydrill_repo = pydriller.Git(pygit_repo.path) if _has_closed_a_bug(issue_event) and issue_event.commit_id: issue_date = issue_event.issue.created_at fixing_commit = pygit_repo.get(issue_event.commit_id) pydrill_fixing_commit = pydrill_repo.get_commit(issue_event.commit_id) blame_dict = pydrill_repo.get_commits_last_modified_lines( pydrill_fixing_commit) non_suspect_commits = set() suspect_commits = set() for introducing_set in blame_dict.values(): for introducing_id in introducing_set: issue_date = issue_event.issue.created_at.astimezone( timezone.utc) introduction_date = pydrill_repo.get_commit( introducing_id).committer_date.astimezone(timezone.utc) if introduction_date > issue_date: # commit is a suspect suspect_commits.add(pygit_repo.get(introducing_id)) else: non_suspect_commits.add(pygit_repo.get(introducing_id)) return PygitSuspectTuple(fixing_commit, non_suspect_commits, suspect_commits, issue_event.issue.number, issue_event.issue.created_at, pydrill_fixing_commit.committer_date) return None
def test_extract_edits_2(git_repo_dir): commit_hash = 'b17c2c321ce8d299de3d063ca0a1b0b363477505' filename = 'first_lines.txt' extraction_settings = { 'use_blocks': True, 'blame_options': ['-C', '-C', '-C4', '--show-number', '--line-porcelain'], 'extract_complexity': True, 'extract_text': True } git_repo = pydriller.Git(git_repo_dir) commit = git_repo.get_commit(commit_hash) df = None for mod in commit.modified_files: if mod.filename == filename: df = git2net.extraction._extract_edits(git_repo, commit, mod, extraction_settings) assert len(df) == 1 assert df.at[0, 'original_commit_addition'] == 'not available with use_blocks'
def test_identify_edits(git_repo_dir): commit_hash = 'f343ed53ee64717f85135c4b8d3f6bd018be80ad' filename = 'text_file.txt' extraction_settings = {'use_blocks': False} git_repo = pydriller.Git(git_repo_dir) commit = git_repo.get_commit(commit_hash) for x in commit.modified_files: if x.filename == filename: mod = x parsed_lines = mod.diff_parsed deleted_lines = {x[0]: x[1] for x in parsed_lines['deleted']} added_lines = {x[0]: x[1] for x in parsed_lines['added']} _, edits = git2net.extraction._identify_edits(deleted_lines, added_lines, extraction_settings) assert list(edits.type) == [ 'deletion', 'replacement', 'deletion', 'replacement', 'addition', 'addition', 'addition' ]
def _compute_complexity_measures(args): """ Computes a set of complexity measures for a given commit/file combination. :param dict args: dictionary with the following key/value pairs: * **git_repo_dir** (*str*) – path to the git repository that is analysed * **commit_hash** (*str*) – hash of the commit that is processed * **old_path** (*str*) – path to the analysed file before the commit * **new_path** (*str*) – path to the analysed file after the commit * **events** (*int*) – number of edit events in the commit/file pair * **levenshtein_distance** (*int*) – total Levenshtein distance in the commit/file pair :return: *pandas.DataFrame* – dataframe containing identifying information and the computed complexity for the commit/file combination. """ filename_old = args['old_path'].split('/')[-1] filename_new = args['new_path'].split('/')[-1] if filename_new != 'None': filename = filename_new else: filename = filename_old result = { 'commit_hash': args['commit_hash'], 'old_path': args['old_path'], 'new_path': args['new_path'], 'events': args['events'], 'levenshtein_distance': args['levenshtein_distance'], 'HE_pre': None, 'CCN_pre': None, 'NLOC_pre': None, 'TOK_pre': None, 'FUN_pre': None, 'HE_post': None, 'CCN_post': None, 'NLOC_post': None, 'TOK_post': None, 'FUN_post': None, 'HE_delta': None, 'CCN_delta': None, 'NLOC_delta': None, 'TOK_delta': None, 'FUN_delta': None } with git_init_lock: pydriller_repo = pydriller.Git(args['git_repo_dir']) pydriller_commit = pydriller_repo.get_commit(args['commit_hash']) found = False for m in pydriller_commit.modified_files: if m.filename == filename: found = True break if found: if pd.notnull(m.source_code_before): result['HE_pre'] = _compute_halstead_effort( m.old_path, m.source_code_before) l_before = lizard.analyze_file.analyze_source_code( m.old_path, m.source_code_before) result['CCN_pre'] = l_before.CCN result['NLOC_pre'] = l_before.nloc result['TOK_pre'] = l_before.token_count result['FUN_pre'] = len(l_before.function_list) else: result['HE_pre'] = 0 result['CCN_pre'] = 0 result['NLOC_pre'] = 0 result['TOK_pre'] = 0 result['FUN_pre'] = 0 if pd.notnull(m.source_code): result['HE_post'] = _compute_halstead_effort( m.new_path, m.source_code) l_after = lizard.analyze_file.analyze_source_code( m.new_path, m.source_code) result['CCN_post'] = l_after.CCN result['NLOC_post'] = l_after.nloc result['TOK_post'] = l_after.token_count result['FUN_post'] = len(l_after.function_list) else: result['HE_post'] = 0 result['CCN_post'] = 0 result['NLOC_post'] = 0 result['TOK_post'] = 0 result['FUN_post'] = 0 result['HE_delta'] = result['HE_post'] - result['HE_pre'] result['CCN_delta'] = result['CCN_post'] - result['CCN_pre'] result['NLOC_delta'] = result['NLOC_post'] - result['NLOC_pre'] result['TOK_delta'] = result['TOK_post'] - result['TOK_pre'] result['FUN_delta'] = result['FUN_post'] - result['FUN_pre'] result_df = pd.DataFrame(result, index=[0]) return result_df