def test_only_commits(): lc = list(RepositoryMining('test-repos/complex_repo', only_commits=["9e71dd5726d775fb4a5f08506a539216e878adbb"]).traverse_commits()) assert len(lc) == 1 assert lc[0].hash == "9e71dd5726d775fb4a5f08506a539216e878adbb" lc = list(RepositoryMining('test-repos/complex_repo', only_commits=["953737b199de233896f00b4d87a0bc2794317253", "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"]).traverse_commits()) assert len(lc) == 2 assert lc[0].hash == "ffccf1e7497eb8136fd66ed5e42bef29677c4b71" assert lc[1].hash == "953737b199de233896f00b4d87a0bc2794317253" lc = list(RepositoryMining('test-repos/complex_repo', only_commits=["866e997a9e44cb4ddd9e00efe49361420aff2559", "57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1", "e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2"]).traverse_commits()) assert len(lc) == 3 assert lc[0].hash == "866e997a9e44cb4ddd9e00efe49361420aff2559" assert lc[1].hash == "57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1" assert lc[2].hash == "e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2" lc = list(RepositoryMining('test-repos/complex_repo', only_commits=["fake hash"]).traverse_commits()) assert len(lc) == 0 total_commits = len(list(RepositoryMining('test-repos/complex_repo').traverse_commits())) assert total_commits == 13
def test_multiple_filters_exceptions(): from_commit = '6411e3096dd2070438a17b225f44475136e54e3a' from_tag = 'v1.4' with pytest.raises(Exception): RepositoryMining('test-repos/test1/', from_commit=from_commit, from_tag=from_tag) with pytest.raises(Exception): RepositoryMining('test-repos/test1/', since=dt2, from_commit=from_commit) with pytest.raises(Exception): RepositoryMining('test-repos/test1/', since=dt2, from_tag=from_tag) with pytest.raises(Exception): RepositoryMining('test-repos/test1/', to=dt2, to_tag=from_tag) with pytest.raises(Exception): RepositoryMining('test-repos/test1/', single=from_commit, to=dt2, to_tag=from_tag)
def test_only_in_branches(): # by default, only analyze master assert 3 == len(list(RepositoryMining('test-repos/test8/').traverse_commits())) # only analyze b2 assert 4 == len(list(RepositoryMining('test-repos/test8/', only_in_branch='b2').traverse_commits())) # only analyze b1 assert 5 == len(list(RepositoryMining('test-repos/test8/', only_in_branch='b1').traverse_commits()))
def test_single_commit_head(): lc = list(RepositoryMining('test-repos/complex_repo', single="e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2").traverse_commits()) assert len(lc) == 1 lc_head = list(RepositoryMining('test-repos/complex_repo', single="HEAD").traverse_commits()) assert len(lc_head) == 1 assert lc[0].hash == lc_head[0].hash
def test_single_commit(): lc = list(RepositoryMining('test-repos/complex_repo', single="866e997a9e44cb4ddd9e00efe49361420aff2559").traverse_commits()) assert len(lc) == 1 assert lc[0].hash == "866e997a9e44cb4ddd9e00efe49361420aff2559" lc = list(RepositoryMining('test-repos/complex_repo', single="ffccf1e7497eb8136fd66ed5e42bef29677c4b71").traverse_commits()) assert len(lc) == 1 assert lc[0].hash == "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"
def test_only_authors(): lc = list( RepositoryMining('test-repos/git-10/', only_authors=["MaurĂcio Aniche"]).traverse_commits()) assert len(lc) == 4 lc = list( RepositoryMining('test-repos/git-10/', only_authors=["ishepard"]).traverse_commits()) assert len(lc) == 1
def test_single_commit(): lc = list(RepositoryMining('test-repos/git-10/', single="4e669cb4f69245dc669e116517d80d038d8e0434").traverse_commits()) assert len(lc) == 1 assert lc[0].hash == "4e669cb4f69245dc669e116517d80d038d8e0434" lc = list(RepositoryMining('test-repos/git-10/', single="168b3aab057ed61a769acf336a4ef5e64f76c9fd").traverse_commits()) assert len(lc) == 1 assert lc[0].hash == "168b3aab057ed61a769acf336a4ef5e64f76c9fd"
def test_only_in_branches(): # by default, only analyze master assert len(list(RepositoryMining('test-repos/branches_not_merged') .traverse_commits())) == 3 # only analyze b2 assert len(list(RepositoryMining('test-repos/branches_not_merged', only_in_branch='b2') .traverse_commits())) == 4 # only analyze b1 assert len(list(RepositoryMining('test-repos/branches_not_merged', only_in_branch='b1') .traverse_commits())) == 5
def test_mod_with_file_types(): lc = list(RepositoryMining('test-repos/different_files', only_modifications_with_file_types=['.java']).traverse_commits()) assert len(lc) == 2 assert lc[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5' assert lc[1].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39' lc = list(RepositoryMining('test-repos/different_files1', only_modifications_with_file_types=['.java']) .traverse_commits()) assert len(lc) == 2 assert lc[0].hash == '5adbb71167e79ab6b974827e74c9da4d81977655' assert lc[1].hash == '0577bec2387ee131e1ccf336adcc172224d3f6f9'
def mine(_type): p = psutil.Process(os.getpid()) dt1 = datetime(2017, 1, 1) dt2 = datetime(2017, 7, 1) all_commits = [] start = datetime.now() for commit in RepositoryMining('test-repos/hadoop', since=dt1, to=dt2).traverse_commits(): memory = p.memory_info()[0] / (2 ** 20) all_commits.append(memory) h = commit.author.name if _type == 0: continue for mod in commit.modifications: dd = mod.diff if _type == 1: continue if mod.filename.endswith('.java'): cc = mod.complexity end = datetime.now() diff = end - start return diff, all_commits
def test_no_single_commit(): with pytest.raises(Exception): for commit in RepositoryMining( 'test-repos/git-5', single="6fe83d9fbf9a63cc1c51e5fe6fd5230f7fbbce6f" ).traverse_commits(): print(commit.hash)
def discard_undesired_fixing_commits(self, commits: List[str]): """ Given a list of commits, discard commits that do not modify at least one Ansible file. Note, the update occurs in-place. That is, the original list is updated. Parameters ---------- commits : List[str] List of commit hash """ # get a sorted list of commits in ascending order of date self.sort_commits(commits) for commit in RepositoryMining(self.path_to_repo, from_commit=commits[0], # first commit in commits to_commit=commits[-1], # last commit in commits only_in_branch=self.branch).traverse_commits(): # if none of the modified files is a Ansible file, then discard the commit if not any(modified_file.change_type == ModificationType.MODIFY and filters.is_ansible_file( modified_file.new_path) for modified_file in commit.modifications): if commit.hash in commits: commits.remove(commit.hash)
def distinct_dev_count(self, path_to_repo: str, filepath: str, from_commit: str = None, to_commit: str = None): """ Return the cumulative number of distinct developers contributed to the file up to the indicated commit. :path_to_repo: path to a single repo :commit_hash: the SHA of the commit to stop counting. If None, the SHA is the latest commit SHA :filepath: the path to the file to count for. E.g. 'doc/README.md' :return: int number of distinct developers contributing to the file """ filepath = str(Path(filepath)) developers = set() for commit in RepositoryMining(path_to_repo, from_commit=from_commit, to_commit=to_commit, reversed_order=True).traverse_commits(): for modified_file in commit.modifications: if filepath in (modified_file.new_path, modified_file.old_path): developers.add(commit.author.email.strip()) if modified_file.change_type == ModificationType.RENAME: filepath = str(Path(modified_file.old_path)) break return len(developers)
def commits_count(self, path_to_repo: str, filepath: str, from_commit: str = None, to_commit: str = None): """ Return the number of commits made to a file from the first commit to the one identified by commit_hash. :path_to_repo: path to a single repo :commit_hash: the SHA of the commit to stop counting. If None, the analysis starts from the latest commit :filepath: the path to the file to count for. E.g. 'doc/README.md' :return: int number of commits made to the file """ filepath = str(Path(filepath)) count = 0 for commit in RepositoryMining(path_to_repo, from_commit=from_commit, to_commit=to_commit, reversed_order=True).traverse_commits(): for modified_file in commit.modifications: if filepath in (modified_file.new_path, modified_file.old_path): count += 1 if modified_file.change_type == ModificationType.RENAME: filepath = str(Path(modified_file.old_path)) break return count
def mine(_type): p = psutil.Process(os.getpid()) dt1 = datetime(2015, 1, 1) dt2 = datetime(2015, 6, 1) all_commits = [] start = datetime.now() for commit in RepositoryMining('test-repos/hadoop', since=dt1, to=dt2).traverse_commits(): memory = p.memory_info()[0] / (2**20) all_commits.append(memory) h = commit.author.name if _type == 0: continue for mod in commit.modifications: a = mod.old_path if _type == 2: dd = mod.diff end = datetime.now() diff = end - start return diff, all_commits
def test_filepath_with_to(): dt = datetime(2018, 6, 6) assert len( list( RepositoryMining(path_to_repo='test-repos/szz', filepath='myfolder/A.java', to=dt).traverse_commits())) == 4
def test_fixes_configuration_data(): for commit in RepositoryMining( path_to_repo='https://github.com/iiab/iiab/', only_commits=['25702f4e1d39965b54dec0e48bda18e8225e01d7' ]).traverse_commits(): assert AnsibleFixingCommitClassifier( commit).fixes_configuration_data()
def test_no_filters(): lc = list(RepositoryMining('test-repos/git-4/').traverse_commits()) assert len(lc) == 3 assert lc[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5' assert lc[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9' assert lc[2].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39'
def test_only_in_main_branch(): lc = list(RepositoryMining('test-repos/branches_not_merged').traverse_commits()) assert len(lc) == 3 assert lc[0].hash == '04b0af7b53c2a0095e98951571aa41c2e0e0dbec' assert lc[1].hash == 'e51421e0beae6a3c20bdcdfc21066e05db675e03' assert lc[2].hash == 'b197ef4f0b4bc5b7d55c8949ecb1c861731f0b9d'
def test_mod_with_file_types_no_extension(): lc = list( RepositoryMining( 'test-repos/git-4/', only_modifications_with_file_types=['.py']).traverse_commits()) assert len(lc) == 0
def test_from_and_to_commit_with_merge_commit(): commits = RepositoryMining( 'test-repos/pydriller', from_commit="015f7144641a418f6a9fae4d024286ec17fd7ce8", to_commit="01d2f2fbeb6980cc5568825d008017ca8ca767d6").traverse_commits( ) assert len(list(commits)) == 3
def test_filepath_with_since(): since = datetime(2018, 6, 6) assert len( list( RepositoryMining(path_to_repo='test-repos/test5', filepath='myfolder/A.java', since=since).traverse_commits())) == 10
def test_fixes_service(): for commit in RepositoryMining( path_to_repo='https://github.com/iiab/iiab/', only_commits=['e7872a2a9da875e47e29c4bb21771c12104cd68e' ]).traverse_commits(): assert AnsibleFixingCommitClassifier(commit).fixes_service()
def label(self) -> Generator[FailureProneFile, None, None]: """ For each FixedFile object, yield a FailureProneFile object for each commit between the FixedFile's bug-introducing-commit and its fixing-commit. `Note:` make sure to run the method ``get_fixed_files`` before. Yields ------ FailureProneFile A FailureProneFile object. """ if not (self.fixing_commits or self.fixed_files): return labeling = dict() for file in self.fixed_files: labeling.setdefault(file.filepath, list()).append(file) for commit in RepositoryMining(self.path_to_repo, from_commit=self.fixing_commits[-1], to_commit=self.commit_hashes[0], order='reverse').traverse_commits(): for files in labeling.values(): for file in files: idx_fic = self.commit_hashes.index(file.fic) idx_bic = self.commit_hashes.index(file.bic) idx_commit = self.commit_hashes.index(commit.hash) if idx_fic > idx_commit >= idx_bic: yield FailureProneFile(filepath=file.filepath, commit=commit.hash, fixing_commit=file.fic) if idx_commit == idx_bic and file.filepath in labeling: if file in labeling[file.filepath]: labeling[file.filepath].remove(file) # Handle file renaming for modified_file in commit.modifications: filepath = modified_file.new_path for file in list(labeling.get(filepath, list())): if self.commit_hashes.index( file.fic) > self.commit_hashes.index( commit.hash) >= self.commit_hashes.index( file.bic): if modified_file.change_type == ModificationType.ADD: if filepath in labeling and file in labeling[ filepath]: labeling[filepath].remove(file) elif modified_file.change_type == ModificationType.RENAME: file.filepath = modified_file.old_path break
def test_between_dates(): list_commits = list( RepositoryMining('test-repos/different_files', since=dt1, to=dt2).traverse_commits()) assert len(list_commits) == 2 assert list_commits[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5' assert list_commits[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9'
def test_multiple_repos_with_tags(): from_tag = 'tag2' to_tag = 'tag3' repos = ['test-repos/tags', 'test-repos/tags', 'test-repos/tags'] lc = list( RepositoryMining(path_to_repo=repos, from_tag=from_tag, to_tag=to_tag).traverse_commits()) assert len(lc) == 9
def test_data_changed(): for commit in RepositoryMining( path_to_repo='https://github.com/iiab/iiab/', only_commits=[ '9272b34b196d9010679157e493e775edca1daa13', '25702f4e1d39965b54dec0e48bda18e8225e01d7' ]).traverse_commits(): assert AnsibleFixingCommitClassifier(commit).data_changed()
def setUpClass(cls) -> None: # Create a fake pydriller.commit.Commit for test cls.commit_obj = list( RepositoryMining( path_to_repo= 'https://github.com/stefanodallapalma/test-github-apis', only_commits=['c9ada15de53d048f4d8e74d12bea62174bc0f957' ]).traverse_commits())[0]
def test_between_dates(): list_commits = list( RepositoryMining('test-repos/git-4/', since=dt1, to=dt2).traverse_commits()) assert 2 == len(list_commits) assert 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5' == list_commits[0].hash assert '375de7a8275ecdc0b28dc8de2568f47241f443e9' == list_commits[1].hash
def test_should_visit_ascendent_order(): lc = list(RepositoryMining('test-repos/small_repo').traverse_commits()) assert len(lc) == 5 assert lc[0].hash == 'a88c84ddf42066611e76e6cb690144e5357d132c' assert lc[1].hash == '6411e3096dd2070438a17b225f44475136e54e3a' assert lc[2].hash == '09f6182cef737db02a085e1d018963c7a29bde5a' assert lc[3].hash == '1f99848edadfffa903b8ba1286a935f1b92b2845' assert lc[4].hash == 'da39b1326dbc2edfe518b90672734a08f3c13458'