コード例 #1
0
ファイル: repository.py プロジェクト: Symbolk/pydriller
    def _prep_repo(self, path_repo: str) -> Generator[Git, None, None]:
        local_path_repo = path_repo
        if self._is_remote(path_repo):
            local_path_repo = self._clone_remote_repo(self._clone_folder(),
                                                      path_repo)
        local_path_repo = str(Path(local_path_repo).expanduser().resolve())

        # when multiple repos are given in input, this variable will serve as a reminder
        # of which one we are currently analyzing
        self._conf.set_value('path_to_repo', local_path_repo)

        self.git = Git(local_path_repo, self._conf)
        # saving the Git object for further use
        self._conf.set_value("git", self.git)

        # checking that the filters are set correctly
        self._conf.sanity_check_filters()
        yield self.git

        # cleaning, this is necessary since GitPython issues on memory leaks
        self._conf.set_value("git", None)
        self.git.clear()
        self.git = None  # type: ignore

        # delete the temporary directory if created
        if self._is_remote(path_repo) and self._cleanup is True:
            assert self._tmp_dir is not None
            try:
                self._tmp_dir.cleanup()
            except PermissionError:
                # on Windows, Python 3.5, 3.6, 3.7 are not able to delete
                # git directories because of read-only files.
                # In this case, just ignore the errors.
                shutil.rmtree(self._tmp_dir.name, ignore_errors=True)
コード例 #2
0
def test_commit_in_master_branch(repo: Git):
    assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f'

    repo.checkout('8986af2a679759e5a15794f6d56e6d46c3f302f1')

    git_to_change_head = Git('test-repos/branches_merged')
    commit = git_to_change_head.get_commit('8169f76a3d7add54b4fc7bca7160d1f1eede6eda')
    assert commit.in_main_branch is False

    repo.reset()
    assert repo.get_head().hash == '29e929fbc5dc6a2e9c620069b24e2a143af4285f'
コード例 #3
0
def test_changed_methods():

    gr = Git("test-repos/diff")

    # add a new method
    mod = gr.get_commit(
        'ea95227e0fd128aa69c7ab6a8ac485f72251b3ed').modified_files[0]
    assert len(mod.changed_methods) == 1
    assert mod.changed_methods[0].name == 'GitRepository::singleProjectThirdMethod'

    # add 2 new methods
    mod = gr.get_commit(
        'd8eb8e80b671246a43c98d97b05f6d1c5ada14fb').modified_files[0]
    assert len(mod.changed_methods) == 2

    # remove one method
    mod = gr.get_commit(
        '0c8f9fdec926785198b399a2c49adb5884aa952c').modified_files[0]
    assert len(mod.changed_methods) == 1

    # add and remove one one method at different locations
    mod = gr.get_commit(
        'd8bb142c5616041b71cbfaa11eeb768d9a1a296e').modified_files[0]
    assert len(mod.changed_methods) == 2

    # add and remove one one method at the same location
    # this is equivalent to replacing a method - although we expect 2 methods
    mod = gr.get_commit(
        '9e9473d5ca310b7663e9df93c402302b6b7f24aa').modified_files[0]
    assert len(mod.changed_methods) == 2

    # update a method
    mod = gr.get_commit(
        'b267a14e0503fdac36d280422f16360d1f661f12').modified_files[0]
    assert len(mod.changed_methods) == 1

    # update and add a new method
    mod = gr.get_commit(
        '2489099dfd90edb99ddc2c82b62524b66c07c687').modified_files[0]
    assert len(mod.changed_methods) == 2

    # update and delete methods
    mod = gr.get_commit(
        '5aebeb30e0238543a93e5bed806639481460cd9a').modified_files[0]
    assert len(mod.changed_methods) == 2

    # delete 3 methods (test cleanup - revert the test file to its
    # initial set of methods)
    mod = gr.get_commit(
        '9f6ddc2aac740a257af59a76860590cb8a84c77b').modified_files[0]
    assert len(mod.changed_methods) == 3
コード例 #4
0
def repo(request):
    gr = Git(request.param)
    yield gr
    gr.clear()
コード例 #5
0
    def extract(self,
                labeled_files: List[FailureProneFile],
                product: bool = True,
                process: bool = True,
                delta: bool = False):
        """ Extract metrics from labeled files.

        Parameters
        ----------
        labeled_files : List[FailureProneFile]
            The list of FailureProneFile objects that are used to label a script as failure-prone (1) or clean (0).
        product: bool
            Whether to extract product metrics.
        process: bool
            Whether to extract process metrics.
        delta: bool
            Whether to extract delta metrics between two successive releases or commits.

        """
        self.dataset = pd.DataFrame()
        git_repo = Git(self.path_to_repo)

        metrics_previous_release = dict()  # Values for iac metrics in the last release

        for commit in Repository(self.path_to_repo, order='date-order', num_workers=1).traverse_commits():

            # To handle renaming in metrics_previous_release
            for modified_file in commit.modified_files:

                old_path = modified_file.old_path
                new_path = modified_file.new_path

                if old_path != new_path and old_path in metrics_previous_release:
                    # Rename key old_path wit new_path
                    metrics_previous_release[new_path] = metrics_previous_release.pop(old_path)

            if commit.hash not in self.commits_at:
                continue

            # Else
            git_repo.checkout(commit.hash)
            process_metrics = {}

            if process:
                # Extract process metrics
                i = self.commits_at.index(commit.hash)
                from_previous_commit = commit.hash if i == 0 else self.commits_at[i - 1]
                to_current_commit = commit.hash  # = self.commits_at[i]
                process_metrics = self.get_process_metrics(from_previous_commit, to_current_commit)

            for filepath in self.get_files():

                file_content = get_content(os.path.join(self.path_to_repo, filepath))

                if not file_content or self.ignore_file(filepath, file_content):
                    continue

                tmp = FailureProneFile(filepath=filepath, commit=commit.hash, fixing_commit='')
                if tmp not in labeled_files:
                    label = 0  # clean
                else:
                    label = 1  # failure-prone

                metrics = dict(
                    filepath=filepath,
                    commit=commit.hash,
                    committed_at=str(commit.committer_date),
                    failure_prone=label
                )

                if process_metrics:
                    metrics['change_set_max'] = process_metrics['dict_change_set_max']
                    metrics['change_set_avg'] = process_metrics['dict_change_set_avg']
                    metrics['code_churn_count'] = process_metrics['dict_code_churn_count'].get(filepath, 0)
                    metrics['code_churn_max'] = process_metrics['dict_code_churn_max'].get(filepath, 0)
                    metrics['code_churn_avg'] = process_metrics['dict_code_churn_avg'].get(filepath, 0)
                    metrics['commits_count'] = process_metrics['dict_commits_count'].get(filepath, 0)
                    metrics['contributors_count'] = process_metrics['dict_contributors_count'].get(filepath, 0)
                    metrics['minor_contributors_count'] = process_metrics['dict_minor_contributors_count'].get(filepath, 0)
                    metrics['highest_contributor_experience'] = process_metrics[
                        'dict_highest_contributor_experience'].get(filepath, 0)
                    metrics['hunks_median'] = process_metrics['dict_hunks_median'].get(filepath, 0)
                    metrics['additions'] = process_metrics['dict_additions'].get(filepath, 0)
                    metrics['additions_max'] = process_metrics['dict_additions_max'].get(filepath, 0)
                    metrics['additions_avg'] = process_metrics['dict_additions_avg'].get(filepath, 0)
                    metrics['deletions'] = process_metrics['dict_deletions'].get(filepath, 0)
                    metrics['deletions_max'] = process_metrics['dict_deletions_max'].get(filepath, 0)
                    metrics['deletions_avg'] = process_metrics['dict_deletions_avg'].get(filepath, 0)

                if product:
                    metrics.update(self.get_product_metrics(file_content))

                if delta:
                    delta_metrics = dict()

                    previous = metrics_previous_release.get(filepath, dict())
                    for metric, value in previous.items():

                        if metric in ('filepath', 'commit', 'committed_at', 'failure_prone'):
                            continue

                        difference = metrics.get(metric, 0) - value
                        delta_metrics[f'delta_{metric}'] = round(difference, 3)

                    metrics_previous_release[filepath] = metrics.copy()
                    metrics.update(delta_metrics)

                self.dataset = self.dataset.append(metrics, ignore_index=True)

            git_repo.reset()
コード例 #6
0
def repo():
    path = "test-repos/dmm-test-repo"
    gr = Git(path)
    yield gr
    gr.clear()