def _prep_repo(self, path_repo: str) -> Generator[Git, None, None]: local_path_repo = path_repo if self._is_remote(path_repo): local_path_repo = self._clone_remote_repo(self._clone_folder(), path_repo) local_path_repo = str(Path(local_path_repo).expanduser().resolve()) # when multiple repos are given in input, this variable will serve as a reminder # of which one we are currently analyzing self._conf.set_value('path_to_repo', local_path_repo) self.git = Git(local_path_repo, self._conf) # saving the Git object for further use self._conf.set_value("git", self.git) # checking that the filters are set correctly self._conf.sanity_check_filters() yield self.git # cleaning, this is necessary since GitPython issues on memory leaks self._conf.set_value("git", None) self.git.clear() self.git = None # type: ignore # delete the temporary directory if created if self._is_remote(path_repo) and self._cleanup is True: assert self._tmp_dir is not None try: self._tmp_dir.cleanup() except PermissionError: # on Windows, Python 3.5, 3.6, 3.7 are not able to delete # git directories because of read-only files. # In this case, just ignore the errors. shutil.rmtree(self._tmp_dir.name, ignore_errors=True)
def test_commit_in_master_branch(repo: Git): assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f' repo.checkout('8986af2a679759e5a15794f6d56e6d46c3f302f1') git_to_change_head = Git('test-repos/branches_merged') commit = git_to_change_head.get_commit('8169f76a3d7add54b4fc7bca7160d1f1eede6eda') assert commit.in_main_branch is False repo.reset() assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f'
def test_changed_methods(): gr = Git("test-repos/diff") # add a new method mod = gr.get_commit( 'ea95227e0fd128aa69c7ab6a8ac485f72251b3ed').modified_files[0] assert len(mod.changed_methods) == 1 assert mod.changed_methods[0].name == 'GitRepository::singleProjectThirdMethod' # add 2 new methods mod = gr.get_commit( 'd8eb8e80b671246a43c98d97b05f6d1c5ada14fb').modified_files[0] assert len(mod.changed_methods) == 2 # remove one method mod = gr.get_commit( '0c8f9fdec926785198b399a2c49adb5884aa952c').modified_files[0] assert len(mod.changed_methods) == 1 # add and remove one one method at different locations mod = gr.get_commit( 'd8bb142c5616041b71cbfaa11eeb768d9a1a296e').modified_files[0] assert len(mod.changed_methods) == 2 # add and remove one one method at the same location # this is equivalent to replacing a method - although we expect 2 methods mod = gr.get_commit( '9e9473d5ca310b7663e9df93c402302b6b7f24aa').modified_files[0] assert len(mod.changed_methods) == 2 # update a method mod = gr.get_commit( 'b267a14e0503fdac36d280422f16360d1f661f12').modified_files[0] assert len(mod.changed_methods) == 1 # update and add a new method mod = gr.get_commit( '2489099dfd90edb99ddc2c82b62524b66c07c687').modified_files[0] assert len(mod.changed_methods) == 2 # update and delete methods mod = gr.get_commit( '5aebeb30e0238543a93e5bed806639481460cd9a').modified_files[0] assert len(mod.changed_methods) == 2 # delete 3 methods (test cleanup - revert the test file to its # initial set of methods) mod = gr.get_commit( '9f6ddc2aac740a257af59a76860590cb8a84c77b').modified_files[0] assert len(mod.changed_methods) == 3
def repo(request): gr = Git(request.param) yield gr gr.clear()
def extract(self, labeled_files: List[FailureProneFile], product: bool = True, process: bool = True, delta: bool = False): """ Extract metrics from labeled files. Parameters ---------- labeled_files : List[FailureProneFile] The list of FailureProneFile objects that are used to label a script as failure-prone (1) or clean (0). product: bool Whether to extract product metrics. process: bool Whether to extract process metrics. delta: bool Whether to extract delta metrics between two successive releases or commits. """ self.dataset = pd.DataFrame() git_repo = Git(self.path_to_repo) metrics_previous_release = dict() # Values for iac metrics in the last release for commit in Repository(self.path_to_repo, order='date-order', num_workers=1).traverse_commits(): # To handle renaming in metrics_previous_release for modified_file in commit.modified_files: old_path = modified_file.old_path new_path = modified_file.new_path if old_path != new_path and old_path in metrics_previous_release: # Rename key old_path wit new_path metrics_previous_release[new_path] = metrics_previous_release.pop(old_path) if commit.hash not in self.commits_at: continue # Else git_repo.checkout(commit.hash) process_metrics = {} if process: # Extract process metrics i = self.commits_at.index(commit.hash) from_previous_commit = commit.hash if i == 0 else self.commits_at[i - 1] to_current_commit = commit.hash # = self.commits_at[i] process_metrics = self.get_process_metrics(from_previous_commit, to_current_commit) for filepath in self.get_files(): file_content = get_content(os.path.join(self.path_to_repo, filepath)) if not file_content or self.ignore_file(filepath, file_content): continue tmp = FailureProneFile(filepath=filepath, commit=commit.hash, fixing_commit='') if tmp not in labeled_files: label = 0 # clean else: label = 1 # failure-prone metrics = dict( filepath=filepath, commit=commit.hash, committed_at=str(commit.committer_date), failure_prone=label ) if process_metrics: metrics['change_set_max'] = process_metrics['dict_change_set_max'] metrics['change_set_avg'] = process_metrics['dict_change_set_avg'] metrics['code_churn_count'] = process_metrics['dict_code_churn_count'].get(filepath, 0) metrics['code_churn_max'] = process_metrics['dict_code_churn_max'].get(filepath, 0) metrics['code_churn_avg'] = process_metrics['dict_code_churn_avg'].get(filepath, 0) metrics['commits_count'] = process_metrics['dict_commits_count'].get(filepath, 0) metrics['contributors_count'] = process_metrics['dict_contributors_count'].get(filepath, 0) metrics['minor_contributors_count'] = process_metrics['dict_minor_contributors_count'].get(filepath, 0) metrics['highest_contributor_experience'] = process_metrics[ 'dict_highest_contributor_experience'].get(filepath, 0) metrics['hunks_median'] = process_metrics['dict_hunks_median'].get(filepath, 0) metrics['additions'] = process_metrics['dict_additions'].get(filepath, 0) metrics['additions_max'] = process_metrics['dict_additions_max'].get(filepath, 0) metrics['additions_avg'] = process_metrics['dict_additions_avg'].get(filepath, 0) metrics['deletions'] = process_metrics['dict_deletions'].get(filepath, 0) metrics['deletions_max'] = process_metrics['dict_deletions_max'].get(filepath, 0) metrics['deletions_avg'] = process_metrics['dict_deletions_avg'].get(filepath, 0) if product: metrics.update(self.get_product_metrics(file_content)) if delta: delta_metrics = dict() previous = metrics_previous_release.get(filepath, dict()) for metric, value in previous.items(): if metric in ('filepath', 'commit', 'committed_at', 'failure_prone'): continue difference = metrics.get(metric, 0) - value delta_metrics[f'delta_{metric}'] = round(difference, 3) metrics_previous_release[filepath] = metrics.copy() metrics.update(delta_metrics) self.dataset = self.dataset.append(metrics, ignore_index=True) git_repo.reset()
def repo(): path = "test-repos/dmm-test-repo" gr = Git(path) yield gr gr.clear()