Esempio n. 1
0
def test_checkout_consecutive_commits(repo: Git):
    repo.checkout('a7053a4dcd627f5f4f213dc9aa002eb1caf926f8')
    repo.checkout('f0dd1308bd904a9b108a6a40865166ee962af3d4')
    repo.checkout('9e71dd5726d775fb4a5f08506a539216e878adbb')
    files3 = repo.files()
    assert len(files3) == 3
    repo.reset()
Esempio n. 2
0
def test_commit_in_master_branch(repo: Git):
    assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f'

    repo.checkout('8986af2a679759e5a15794f6d56e6d46c3f302f1')

    git_to_change_head = Git('test-repos/branches_merged')
    commit = git_to_change_head.get_commit('8169f76a3d7add54b4fc7bca7160d1f1eede6eda')
    assert commit.in_main_branch is False

    repo.reset()
    assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f'
def test_checkout_with_commit_not_fully_merged_to_master(repo: Git):
    repo.checkout('developing')
    files1 = repo.files()
    assert len(files1) == 2

    repo.reset()
    files2 = repo.files()
    assert len(files2) == 1

    repo.checkout('developing')
    files1 = repo.files()
    assert len(files1) == 2
    repo.reset()
Esempio n. 4
0
def test_checkout_with_commit_not_fully_merged_to_master(repo: Git):
    repo.checkout('developing')
    files1 = repo.files()
    assert len(files1) == 2

    repo.reset()
    assert 4, "temp branch should be cleared." == len(repo.repo.branches)
    files2 = repo.files()
    assert len(files2) == 1

    repo.checkout('developing')
    files1 = repo.files()
    assert len(files1) == 2
    repo.reset()
Esempio n. 5
0
    def extract(self,
                labeled_files: List[FailureProneFile],
                product: bool = True,
                process: bool = True,
                delta: bool = False):
        """ Extract metrics from labeled files.

        Parameters
        ----------
        labeled_files : List[FailureProneFile]
            The list of FailureProneFile objects that are used to label a script as failure-prone (1) or clean (0).
        product: bool
            Whether to extract product metrics.
        process: bool
            Whether to extract process metrics.
        delta: bool
            Whether to extract delta metrics between two successive releases or commits.

        """
        self.dataset = pd.DataFrame()
        git_repo = Git(self.path_to_repo)

        metrics_previous_release = dict()  # Values for iac metrics in the last release

        for commit in Repository(self.path_to_repo, order='date-order', num_workers=1).traverse_commits():

            # To handle renaming in metrics_previous_release
            for modified_file in commit.modified_files:

                old_path = modified_file.old_path
                new_path = modified_file.new_path

                if old_path != new_path and old_path in metrics_previous_release:
                    # Rename key old_path wit new_path
                    metrics_previous_release[new_path] = metrics_previous_release.pop(old_path)

            if commit.hash not in self.commits_at:
                continue

            # Else
            git_repo.checkout(commit.hash)
            process_metrics = {}

            if process:
                # Extract process metrics
                i = self.commits_at.index(commit.hash)
                from_previous_commit = commit.hash if i == 0 else self.commits_at[i - 1]
                to_current_commit = commit.hash  # = self.commits_at[i]
                process_metrics = self.get_process_metrics(from_previous_commit, to_current_commit)

            for filepath in self.get_files():

                file_content = get_content(os.path.join(self.path_to_repo, filepath))

                if not file_content or self.ignore_file(filepath, file_content):
                    continue

                tmp = FailureProneFile(filepath=filepath, commit=commit.hash, fixing_commit='')
                if tmp not in labeled_files:
                    label = 0  # clean
                else:
                    label = 1  # failure-prone

                metrics = dict(
                    filepath=filepath,
                    commit=commit.hash,
                    committed_at=str(commit.committer_date),
                    failure_prone=label
                )

                if process_metrics:
                    metrics['change_set_max'] = process_metrics['dict_change_set_max']
                    metrics['change_set_avg'] = process_metrics['dict_change_set_avg']
                    metrics['code_churn_count'] = process_metrics['dict_code_churn_count'].get(filepath, 0)
                    metrics['code_churn_max'] = process_metrics['dict_code_churn_max'].get(filepath, 0)
                    metrics['code_churn_avg'] = process_metrics['dict_code_churn_avg'].get(filepath, 0)
                    metrics['commits_count'] = process_metrics['dict_commits_count'].get(filepath, 0)
                    metrics['contributors_count'] = process_metrics['dict_contributors_count'].get(filepath, 0)
                    metrics['minor_contributors_count'] = process_metrics['dict_minor_contributors_count'].get(filepath, 0)
                    metrics['highest_contributor_experience'] = process_metrics[
                        'dict_highest_contributor_experience'].get(filepath, 0)
                    metrics['hunks_median'] = process_metrics['dict_hunks_median'].get(filepath, 0)
                    metrics['additions'] = process_metrics['dict_additions'].get(filepath, 0)
                    metrics['additions_max'] = process_metrics['dict_additions_max'].get(filepath, 0)
                    metrics['additions_avg'] = process_metrics['dict_additions_avg'].get(filepath, 0)
                    metrics['deletions'] = process_metrics['dict_deletions'].get(filepath, 0)
                    metrics['deletions_max'] = process_metrics['dict_deletions_max'].get(filepath, 0)
                    metrics['deletions_avg'] = process_metrics['dict_deletions_avg'].get(filepath, 0)

                if product:
                    metrics.update(self.get_product_metrics(file_content))

                if delta:
                    delta_metrics = dict()

                    previous = metrics_previous_release.get(filepath, dict())
                    for metric, value in previous.items():

                        if metric in ('filepath', 'commit', 'committed_at', 'failure_prone'):
                            continue

                        difference = metrics.get(metric, 0) - value
                        delta_metrics[f'delta_{metric}'] = round(difference, 3)

                    metrics_previous_release[filepath] = metrics.copy()
                    metrics.update(delta_metrics)

                self.dataset = self.dataset.append(metrics, ignore_index=True)

            git_repo.reset()