def test_checkout_consecutive_commits(repo: Git): repo.checkout('a7053a4dcd627f5f4f213dc9aa002eb1caf926f8') repo.checkout('f0dd1308bd904a9b108a6a40865166ee962af3d4') repo.checkout('9e71dd5726d775fb4a5f08506a539216e878adbb') files3 = repo.files() assert len(files3) == 3 repo.reset()
def test_commit_in_master_branch(repo: Git): assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f' repo.checkout('8986af2a679759e5a15794f6d56e6d46c3f302f1') git_to_change_head = Git('test-repos/branches_merged') commit = git_to_change_head.get_commit('8169f76a3d7add54b4fc7bca7160d1f1eede6eda') assert commit.in_main_branch is False repo.reset() assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f'
def test_checkout_with_commit_not_fully_merged_to_master(repo: Git): repo.checkout('developing') files1 = repo.files() assert len(files1) == 2 repo.reset() files2 = repo.files() assert len(files2) == 1 repo.checkout('developing') files1 = repo.files() assert len(files1) == 2 repo.reset()
def test_checkout_with_commit_not_fully_merged_to_master(repo: Git): repo.checkout('developing') files1 = repo.files() assert len(files1) == 2 repo.reset() assert 4, "temp branch should be cleared." == len(repo.repo.branches) files2 = repo.files() assert len(files2) == 1 repo.checkout('developing') files1 = repo.files() assert len(files1) == 2 repo.reset()
def extract(self, labeled_files: List[FailureProneFile], product: bool = True, process: bool = True, delta: bool = False): """ Extract metrics from labeled files. Parameters ---------- labeled_files : List[FailureProneFile] The list of FailureProneFile objects that are used to label a script as failure-prone (1) or clean (0). product: bool Whether to extract product metrics. process: bool Whether to extract process metrics. delta: bool Whether to extract delta metrics between two successive releases or commits. """ self.dataset = pd.DataFrame() git_repo = Git(self.path_to_repo) metrics_previous_release = dict() # Values for iac metrics in the last release for commit in Repository(self.path_to_repo, order='date-order', num_workers=1).traverse_commits(): # To handle renaming in metrics_previous_release for modified_file in commit.modified_files: old_path = modified_file.old_path new_path = modified_file.new_path if old_path != new_path and old_path in metrics_previous_release: # Rename key old_path wit new_path metrics_previous_release[new_path] = metrics_previous_release.pop(old_path) if commit.hash not in self.commits_at: continue # Else git_repo.checkout(commit.hash) process_metrics = {} if process: # Extract process metrics i = self.commits_at.index(commit.hash) from_previous_commit = commit.hash if i == 0 else self.commits_at[i - 1] to_current_commit = commit.hash # = self.commits_at[i] process_metrics = self.get_process_metrics(from_previous_commit, to_current_commit) for filepath in self.get_files(): file_content = get_content(os.path.join(self.path_to_repo, filepath)) if not file_content or self.ignore_file(filepath, file_content): continue tmp = FailureProneFile(filepath=filepath, commit=commit.hash, fixing_commit='') if tmp not in labeled_files: label = 0 # clean else: label = 1 # failure-prone metrics = dict( filepath=filepath, commit=commit.hash, committed_at=str(commit.committer_date), failure_prone=label ) if process_metrics: metrics['change_set_max'] = process_metrics['dict_change_set_max'] metrics['change_set_avg'] = process_metrics['dict_change_set_avg'] metrics['code_churn_count'] = process_metrics['dict_code_churn_count'].get(filepath, 0) metrics['code_churn_max'] = process_metrics['dict_code_churn_max'].get(filepath, 0) metrics['code_churn_avg'] = process_metrics['dict_code_churn_avg'].get(filepath, 0) metrics['commits_count'] = process_metrics['dict_commits_count'].get(filepath, 0) metrics['contributors_count'] = process_metrics['dict_contributors_count'].get(filepath, 0) metrics['minor_contributors_count'] = process_metrics['dict_minor_contributors_count'].get(filepath, 0) metrics['highest_contributor_experience'] = process_metrics[ 'dict_highest_contributor_experience'].get(filepath, 0) metrics['hunks_median'] = process_metrics['dict_hunks_median'].get(filepath, 0) metrics['additions'] = process_metrics['dict_additions'].get(filepath, 0) metrics['additions_max'] = process_metrics['dict_additions_max'].get(filepath, 0) metrics['additions_avg'] = process_metrics['dict_additions_avg'].get(filepath, 0) metrics['deletions'] = process_metrics['dict_deletions'].get(filepath, 0) metrics['deletions_max'] = process_metrics['dict_deletions_max'].get(filepath, 0) metrics['deletions_avg'] = process_metrics['dict_deletions_avg'].get(filepath, 0) if product: metrics.update(self.get_product_metrics(file_content)) if delta: delta_metrics = dict() previous = metrics_previous_release.get(filepath, dict()) for metric, value in previous.items(): if metric in ('filepath', 'commit', 'committed_at', 'failure_prone'): continue difference = metrics.get(metric, 0) - value delta_metrics[f'delta_{metric}'] = round(difference, 3) metrics_previous_release[filepath] = metrics.copy() metrics.update(delta_metrics) self.dataset = self.dataset.append(metrics, ignore_index=True) git_repo.reset()