Esempio n. 1
0
def test_only_commits():
    lc = list(RepositoryMining('test-repos/complex_repo',
                               only_commits=["9e71dd5726d775fb4a5f08506a539216e878adbb"]).traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "9e71dd5726d775fb4a5f08506a539216e878adbb"

    lc = list(RepositoryMining('test-repos/complex_repo',
                               only_commits=["953737b199de233896f00b4d87a0bc2794317253",
                                             "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"]).traverse_commits())
    assert len(lc) == 2
    assert lc[0].hash == "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"
    assert lc[1].hash == "953737b199de233896f00b4d87a0bc2794317253"

    lc = list(RepositoryMining('test-repos/complex_repo',
                               only_commits=["866e997a9e44cb4ddd9e00efe49361420aff2559",
                                             "57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1",
                                             "e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2"]).traverse_commits())
    assert len(lc) == 3
    assert lc[0].hash == "866e997a9e44cb4ddd9e00efe49361420aff2559"
    assert lc[1].hash == "57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1"
    assert lc[2].hash == "e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2"

    lc = list(RepositoryMining('test-repos/complex_repo',
                               only_commits=["fake hash"]).traverse_commits())
    assert len(lc) == 0

    total_commits = len(list(RepositoryMining('test-repos/complex_repo').traverse_commits()))

    assert total_commits == 13
Esempio n. 2
0
def test_multiple_filters_exceptions():
    from_commit = '6411e3096dd2070438a17b225f44475136e54e3a'
    from_tag = 'v1.4'

    with pytest.raises(Exception):
        RepositoryMining('test-repos/test1/',
                         from_commit=from_commit,
                         from_tag=from_tag)

    with pytest.raises(Exception):
        RepositoryMining('test-repos/test1/',
                         since=dt2,
                         from_commit=from_commit)

    with pytest.raises(Exception):
        RepositoryMining('test-repos/test1/', since=dt2, from_tag=from_tag)

    with pytest.raises(Exception):
        RepositoryMining('test-repos/test1/', to=dt2, to_tag=from_tag)

    with pytest.raises(Exception):
        RepositoryMining('test-repos/test1/',
                         single=from_commit,
                         to=dt2,
                         to_tag=from_tag)
Esempio n. 3
0
def test_only_in_branches():
    # by default, only analyze master
    assert 3 == len(list(RepositoryMining('test-repos/test8/').traverse_commits()))
    # only analyze b2
    assert 4 == len(list(RepositoryMining('test-repos/test8/', only_in_branch='b2').traverse_commits()))
    # only analyze b1
    assert 5 == len(list(RepositoryMining('test-repos/test8/', only_in_branch='b1').traverse_commits()))
Esempio n. 4
0
def test_single_commit_head():
    lc = list(RepositoryMining('test-repos/complex_repo',
                               single="e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2").traverse_commits())
    assert len(lc) == 1

    lc_head = list(RepositoryMining('test-repos/complex_repo', single="HEAD").traverse_commits())
    assert len(lc_head) == 1
    assert lc[0].hash == lc_head[0].hash
Esempio n. 5
0
def test_single_commit():
    lc = list(RepositoryMining('test-repos/complex_repo',
                               single="866e997a9e44cb4ddd9e00efe49361420aff2559").traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "866e997a9e44cb4ddd9e00efe49361420aff2559"

    lc = list(RepositoryMining('test-repos/complex_repo',
                               single="ffccf1e7497eb8136fd66ed5e42bef29677c4b71").traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"
Esempio n. 6
0
def test_only_authors():
    lc = list(
        RepositoryMining('test-repos/git-10/',
                         only_authors=["Maurício Aniche"]).traverse_commits())
    assert len(lc) == 4

    lc = list(
        RepositoryMining('test-repos/git-10/',
                         only_authors=["ishepard"]).traverse_commits())
    assert len(lc) == 1
Esempio n. 7
0
def test_single_commit():
    lc = list(RepositoryMining('test-repos/git-10/',
                               single="4e669cb4f69245dc669e116517d80d038d8e0434").traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "4e669cb4f69245dc669e116517d80d038d8e0434"

    lc = list(RepositoryMining('test-repos/git-10/',
                               single="168b3aab057ed61a769acf336a4ef5e64f76c9fd").traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "168b3aab057ed61a769acf336a4ef5e64f76c9fd"
Esempio n. 8
0
def test_only_in_branches():
    # by default, only analyze master
    assert len(list(RepositoryMining('test-repos/branches_not_merged')
                    .traverse_commits())) == 3
    # only analyze b2
    assert len(list(RepositoryMining('test-repos/branches_not_merged',
                                     only_in_branch='b2')
                    .traverse_commits())) == 4
    # only analyze b1
    assert len(list(RepositoryMining('test-repos/branches_not_merged',
                                     only_in_branch='b1')
                    .traverse_commits())) == 5
Esempio n. 9
0
def test_mod_with_file_types():
    lc = list(RepositoryMining('test-repos/different_files',
                               only_modifications_with_file_types=['.java']).traverse_commits())

    assert len(lc) == 2
    assert lc[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert lc[1].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39'

    lc = list(RepositoryMining('test-repos/different_files1',
                               only_modifications_with_file_types=['.java'])
              .traverse_commits())

    assert len(lc) == 2
    assert lc[0].hash == '5adbb71167e79ab6b974827e74c9da4d81977655'
    assert lc[1].hash == '0577bec2387ee131e1ccf336adcc172224d3f6f9'
Esempio n. 10
0
def mine(_type):
    p = psutil.Process(os.getpid())
    dt1 = datetime(2017, 1, 1)
    dt2 = datetime(2017, 7, 1)
    all_commits = []

    start = datetime.now()
    for commit in RepositoryMining('test-repos/hadoop',
                                   since=dt1,
                                   to=dt2).traverse_commits():
        memory = p.memory_info()[0] / (2 ** 20)
        all_commits.append(memory)

        h = commit.author.name

        if _type == 0:
            continue

        for mod in commit.modifications:
            dd = mod.diff

            if _type == 1:
                continue

            if mod.filename.endswith('.java'):
                cc = mod.complexity

    end = datetime.now()

    diff = end - start

    return diff, all_commits
def test_no_single_commit():
    with pytest.raises(Exception):
        for commit in RepositoryMining(
                'test-repos/git-5',
                single="6fe83d9fbf9a63cc1c51e5fe6fd5230f7fbbce6f"
        ).traverse_commits():
            print(commit.hash)
Esempio n. 12
0
    def discard_undesired_fixing_commits(self, commits: List[str]):
        """
        Given a list of commits, discard commits that do not modify at least one Ansible file.

        Note, the update occurs in-place. That is, the original list is updated.

        Parameters
        ----------
        commits : List[str]
            List of commit hash

        """
        # get a sorted list of commits in ascending order of date
        self.sort_commits(commits)

        for commit in RepositoryMining(self.path_to_repo,
                                       from_commit=commits[0],  # first commit in commits
                                       to_commit=commits[-1],  # last commit in commits
                                       only_in_branch=self.branch).traverse_commits():

            # if none of the modified files is a Ansible file, then discard the commit
            if not any(modified_file.change_type == ModificationType.MODIFY and filters.is_ansible_file(
                    modified_file.new_path) for modified_file in commit.modifications):
                if commit.hash in commits:
                    commits.remove(commit.hash)
Esempio n. 13
0
    def distinct_dev_count(self, path_to_repo: str, filepath: str,
                           from_commit: str = None, to_commit: str = None):
        """
        Return the cumulative number of distinct developers contributed to
        the file up to the indicated commit.

        :path_to_repo: path to a single repo
        :commit_hash: the SHA of the commit to stop counting. If None,
            the SHA is the latest commit SHA
        :filepath: the path to the file to count for. E.g. 'doc/README.md'

        :return: int number of distinct developers contributing to the file
        """
        filepath = str(Path(filepath))
        developers = set()

        for commit in RepositoryMining(path_to_repo, from_commit=from_commit,
                                       to_commit=to_commit,
                                       reversed_order=True).traverse_commits():

            for modified_file in commit.modifications:
                if filepath in (modified_file.new_path,
                                modified_file.old_path):
                    developers.add(commit.author.email.strip())

                    if modified_file.change_type == ModificationType.RENAME:
                        filepath = str(Path(modified_file.old_path))

                    break

        return len(developers)
Esempio n. 14
0
    def commits_count(self, path_to_repo: str, filepath: str,
                      from_commit: str = None, to_commit: str = None):
        """
        Return the number of commits made to a file from the first commit to
        the one identified by commit_hash.

        :path_to_repo: path to a single repo
        :commit_hash: the SHA of the commit to stop counting. If None, the
            analysis starts from the latest commit
        :filepath: the path to the file to count for. E.g. 'doc/README.md'

        :return: int number of commits made to the file
        """

        filepath = str(Path(filepath))
        count = 0

        for commit in RepositoryMining(path_to_repo, from_commit=from_commit,
                                       to_commit=to_commit,
                                       reversed_order=True).traverse_commits():
            for modified_file in commit.modifications:
                if filepath in (modified_file.new_path,
                                modified_file.old_path):
                    count += 1

                    if modified_file.change_type == ModificationType.RENAME:
                        filepath = str(Path(modified_file.old_path))

                    break
        return count
Esempio n. 15
0
def mine(_type):
    p = psutil.Process(os.getpid())
    dt1 = datetime(2015, 1, 1)
    dt2 = datetime(2015, 6, 1)
    all_commits = []

    start = datetime.now()
    for commit in RepositoryMining('test-repos/hadoop', since=dt1,
                                   to=dt2).traverse_commits():
        memory = p.memory_info()[0] / (2**20)
        all_commits.append(memory)

        h = commit.author.name

        if _type == 0:
            continue

        for mod in commit.modifications:
            a = mod.old_path

            if _type == 2:
                dd = mod.diff
    end = datetime.now()

    diff = end - start

    return diff, all_commits
Esempio n. 16
0
def test_filepath_with_to():
    dt = datetime(2018, 6, 6)
    assert len(
        list(
            RepositoryMining(path_to_repo='test-repos/szz',
                             filepath='myfolder/A.java',
                             to=dt).traverse_commits())) == 4
 def test_fixes_configuration_data():
     for commit in RepositoryMining(
             path_to_repo='https://github.com/iiab/iiab/',
             only_commits=['25702f4e1d39965b54dec0e48bda18e8225e01d7'
                           ]).traverse_commits():
         assert AnsibleFixingCommitClassifier(
             commit).fixes_configuration_data()
Esempio n. 18
0
def test_no_filters():
    lc = list(RepositoryMining('test-repos/git-4/').traverse_commits())

    assert len(lc) == 3
    assert lc[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert lc[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9'
    assert lc[2].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39'
Esempio n. 19
0
def test_only_in_main_branch():
    lc = list(RepositoryMining('test-repos/branches_not_merged').traverse_commits())

    assert len(lc) == 3
    assert lc[0].hash == '04b0af7b53c2a0095e98951571aa41c2e0e0dbec'
    assert lc[1].hash == 'e51421e0beae6a3c20bdcdfc21066e05db675e03'
    assert lc[2].hash == 'b197ef4f0b4bc5b7d55c8949ecb1c861731f0b9d'
Esempio n. 20
0
def test_mod_with_file_types_no_extension():
    lc = list(
        RepositoryMining(
            'test-repos/git-4/',
            only_modifications_with_file_types=['.py']).traverse_commits())

    assert len(lc) == 0
Esempio n. 21
0
def test_from_and_to_commit_with_merge_commit():
    commits = RepositoryMining(
        'test-repos/pydriller',
        from_commit="015f7144641a418f6a9fae4d024286ec17fd7ce8",
        to_commit="01d2f2fbeb6980cc5568825d008017ca8ca767d6").traverse_commits(
        )
    assert len(list(commits)) == 3
Esempio n. 22
0
def test_filepath_with_since():
    since = datetime(2018, 6, 6)
    assert len(
        list(
            RepositoryMining(path_to_repo='test-repos/test5',
                             filepath='myfolder/A.java',
                             since=since).traverse_commits())) == 10
    def test_fixes_service():
        for commit in RepositoryMining(
                path_to_repo='https://github.com/iiab/iiab/',
                only_commits=['e7872a2a9da875e47e29c4bb21771c12104cd68e'
                              ]).traverse_commits():

            assert AnsibleFixingCommitClassifier(commit).fixes_service()
Esempio n. 24
0
    def label(self) -> Generator[FailureProneFile, None, None]:
        """
        For each FixedFile object, yield a FailureProneFile object for each commit between the FixedFile's
        bug-introducing-commit and its fixing-commit.

        `Note:` make sure to run the method ``get_fixed_files`` before.

        Yields
        ------
        FailureProneFile
            A FailureProneFile object.

        """

        if not (self.fixing_commits or self.fixed_files):
            return

        labeling = dict()
        for file in self.fixed_files:
            labeling.setdefault(file.filepath, list()).append(file)

        for commit in RepositoryMining(self.path_to_repo,
                                       from_commit=self.fixing_commits[-1],
                                       to_commit=self.commit_hashes[0],
                                       order='reverse').traverse_commits():

            for files in labeling.values():
                for file in files:

                    idx_fic = self.commit_hashes.index(file.fic)
                    idx_bic = self.commit_hashes.index(file.bic)
                    idx_commit = self.commit_hashes.index(commit.hash)

                    if idx_fic > idx_commit >= idx_bic:
                        yield FailureProneFile(filepath=file.filepath,
                                               commit=commit.hash,
                                               fixing_commit=file.fic)

                    if idx_commit == idx_bic and file.filepath in labeling:
                        if file in labeling[file.filepath]:
                            labeling[file.filepath].remove(file)

            # Handle file renaming
            for modified_file in commit.modifications:
                filepath = modified_file.new_path

                for file in list(labeling.get(filepath, list())):
                    if self.commit_hashes.index(
                            file.fic) > self.commit_hashes.index(
                                commit.hash) >= self.commit_hashes.index(
                                    file.bic):

                        if modified_file.change_type == ModificationType.ADD:
                            if filepath in labeling and file in labeling[
                                    filepath]:
                                labeling[filepath].remove(file)
                        elif modified_file.change_type == ModificationType.RENAME:
                            file.filepath = modified_file.old_path
                        break
Esempio n. 25
0
def test_between_dates():
    list_commits = list(
        RepositoryMining('test-repos/different_files', since=dt1,
                         to=dt2).traverse_commits())

    assert len(list_commits) == 2
    assert list_commits[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert list_commits[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9'
def test_multiple_repos_with_tags():
    from_tag = 'tag2'
    to_tag = 'tag3'
    repos = ['test-repos/tags', 'test-repos/tags', 'test-repos/tags']
    lc = list(
        RepositoryMining(path_to_repo=repos, from_tag=from_tag,
                         to_tag=to_tag).traverse_commits())
    assert len(lc) == 9
 def test_data_changed():
     for commit in RepositoryMining(
             path_to_repo='https://github.com/iiab/iiab/',
             only_commits=[
                 '9272b34b196d9010679157e493e775edca1daa13',
                 '25702f4e1d39965b54dec0e48bda18e8225e01d7'
             ]).traverse_commits():
         assert AnsibleFixingCommitClassifier(commit).data_changed()
 def setUpClass(cls) -> None:
     # Create a fake pydriller.commit.Commit for test
     cls.commit_obj = list(
         RepositoryMining(
             path_to_repo=
             'https://github.com/stefanodallapalma/test-github-apis',
             only_commits=['c9ada15de53d048f4d8e74d12bea62174bc0f957'
                           ]).traverse_commits())[0]
Esempio n. 29
0
def test_between_dates():
    list_commits = list(
        RepositoryMining('test-repos/git-4/', since=dt1,
                         to=dt2).traverse_commits())

    assert 2 == len(list_commits)
    assert 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5' == list_commits[0].hash
    assert '375de7a8275ecdc0b28dc8de2568f47241f443e9' == list_commits[1].hash
Esempio n. 30
0
def test_should_visit_ascendent_order():
    lc = list(RepositoryMining('test-repos/small_repo').traverse_commits())
    assert len(lc) == 5
    assert lc[0].hash == 'a88c84ddf42066611e76e6cb690144e5357d132c'
    assert lc[1].hash == '6411e3096dd2070438a17b225f44475136e54e3a'
    assert lc[2].hash == '09f6182cef737db02a085e1d018963c7a29bde5a'
    assert lc[3].hash == '1f99848edadfffa903b8ba1286a935f1b92b2845'
    assert lc[4].hash == 'da39b1326dbc2edfe518b90672734a08f3c13458'