예제 #1
0
    def __init__(self,
                 path_to_repo: str,
                 since: datetime = None,
                 to: datetime = None,
                 from_commit: str = None,
                 to_commit: str = None):
        """
        :path_to_repo: path to a single repo

        :param datetime since: starting date

        :param datetime to: ending date

        :param str from_commit: starting commit (only if `since` is None)

        :param str to_commit: ending commit (only if `to` is None)
        """

        if not since and not from_commit:
            raise TypeError('You must pass one between since and from_commit')

        if not to and not to_commit:
            raise TypeError('You must pass one between to and to_commit')

        if from_commit and to_commit and from_commit == to_commit:  # Use 'single' param to avoid Warning
            self.repo_miner = Repository(path_to_repo, single=from_commit)

        else:
            self.repo_miner = Repository(path_to_repo=path_to_repo,
                                         since=since,
                                         to=to,
                                         from_commit=from_commit,
                                         to_commit=to_commit,
                                         order='reverse')
예제 #2
0
def test_badly_formatted_url():
    with pytest.raises(Exception):
        list(
            Repository(path_to_repo='https://github.com/ishepard.git/test').
            traverse_commits())

    with pytest.raises(Exception):
        list(Repository(path_to_repo='test').traverse_commits())
예제 #3
0
def test_ignore_add_whitespaces():
    commit = list(
        Repository('test-repos/whitespace',
                   single="338a74ceae164784e216555d930210371279ba8e").
        traverse_commits())[0]
    assert len(commit.modified_files) == 1
    commit = list(
        Repository('test-repos/whitespace',
                   skip_whitespaces=True,
                   single="338a74ceae164784e216555d930210371279ba8e").
        traverse_commits())[0]
    assert len(commit.modified_files) == 0
예제 #4
0
def test_ignore_deleted_whitespaces():
    commit = list(
        Repository('test-repos/whitespace',
                   single="e6e429f6b485e18fb856019d9953370fd5420b20").
        traverse_commits())[0]
    assert len(commit.modified_files) == 1
    commit = list(
        Repository('test-repos/whitespace',
                   skip_whitespaces=True,
                   single="e6e429f6b485e18fb856019d9953370fd5420b20").
        traverse_commits())[0]
    assert len(commit.modified_files) == 0
예제 #5
0
def test_ignore_add_whitespaces_and_changed_file():
    commit = list(
        Repository('test-repos/whitespace',
                   single="532068e9d64b8a86e07eea93de3a57bf9e5b4ae0").
        traverse_commits())[0]
    assert len(commit.modified_files) == 2
    commit = list(
        Repository('test-repos/whitespace',
                   skip_whitespaces=True,
                   single="532068e9d64b8a86e07eea93de3a57bf9e5b4ae0").
        traverse_commits())[0]
    assert len(commit.modified_files) == 1
def test_clone_repo_to_repeated():
    import tempfile
    tmp_path = tempfile.gettempdir()
    dt2 = datetime(2018, 10, 20)
    url = "https://github.com/ishepard/pydriller.git"
    assert len(
        list(
            Repository(path_to_repo=url, to=dt2,
                       clone_repo_to=str(tmp_path)).traverse_commits())) == 159
    assert os.path.isdir(os.path.join(tmp_path, "pydriller"))
    assert len(
        list(
            Repository(path_to_repo=url, to=dt2,
                       clone_repo_to=str(tmp_path)).traverse_commits())) == 159
    assert os.path.isdir(os.path.join(tmp_path, "pydriller"))
예제 #7
0
    def _exclude_commits_by_change_size(self,
                                        commit_hash: str,
                                        max_change_size: int = 20) -> Set[str]:
        to_exclude = set()
        repo_mining = Repository(self.repository_path,
                                 to_commit=commit_hash,
                                 order='reverse').traverse_commits()
        for commit in repo_mining:
            try:
                if len(commit.modified_files) > max_change_size:
                    to_exclude.add(commit.hash)
                else:
                    break
            except Exception as e:
                log.error(
                    f'unable to analyze commit: {self.repository_path} {commit.hash}'
                )
                raise e

        if len(to_exclude) > 0:
            log.info(
                f'count of commits excluded by change size > {max_change_size}: {len(to_exclude)}'
            )

        return to_exclude
예제 #8
0
def git_log_to_json(pathRepository, commitFrom):
    pathRepository = r"C:\Users\login\work\buggyChangeLocater\projects\egit\repositoryMethod"
    commitFrom = "b459d7381ea57e435bd9b71eb37a4cb4160e252b"
    commits = []
    for i, commit in enumerate(
            Repository(pathRepository,
                       to_commit=commitFrom).traverse_commits()):
        comment = commit.msg.replace("\r\n", " ").replace("\n", " ").replace(
            "\r", " ").replace("	", " ")
        comment = comment + " Modified : None"
        for modified_file in commit.modified_files:
            if (modified_file.change_type == ModificationType.ADD):
                comment = comment + " Added : " + modified_file.new_path
            elif (modified_file.change_type == ModificationType.DELETE):
                comment = comment + " Deleted : " + modified_file.old_path
            else:
                comment = comment + " Modified : " + modified_file.new_path
        row = {}
        row["id"] = i  #commit.hash
        row["author"] = commit.author.name
        row["date"] = str(
            int(commit.committer_date.timestamp())
        ) + "000"  #.strftime("%Y-%m-%d %H:%M:%S %z")#2020-07-24     23:52:45 +0200
        row["comment"] = comment
        commits.append(row)
    with open('commits.csv', 'w', encoding="utf-8", newline="") as f:
        writer = csv.writer(f, delimiter='\t')
        count = 0
        for commit in commits:
            count += 1
            writer.writerow([
                commit["id"], commit["date"], commit["author"],
                commit["comment"]
            ])
예제 #9
0
def pydriller_szz(git_path, bugsfixes_json, results_path):
    g = Git(git_path)
    with open(bugsfixes_json) as f:
        commits = json.loads(f.read())

    bic = {}
    for a in commits:
        bic[a['fix_commit_hash']] = {}
        c = next(
            Repository(git_path,
                       single=a['fix_commit_hash']).traverse_commits())
        for f in c.modified_files:
            if f.new_path is None:
                continue
            if '\\test\\' in f.new_path or not f.new_path.endswith('.java'):
                continue
            ans = g.get_commits_last_modified_lines(c, f)
            for f_name in ans:
                bic[a['fix_commit_hash']][f_name] = list(ans[f_name])

    with open(results_path + ".json", 'w') as out:
        json.dump(bic, out)

    as_csv = []
    for bugfix_commit in bic:
        for f_name in bic[bugfix_commit]:
            for bic_commit in bic[bugfix_commit][f_name]:
                as_csv.append([bugfix_commit, f_name, bic_commit])
    df = pd.DataFrame(as_csv, columns=['bugfix_commit', 'filename', 'bic'])
    df.to_csv(results_path + ".csv", index=False)
def mine(_type):
    p = psutil.Process(os.getpid())
    dt1 = datetime(2017, 1, 1)
    dt2 = datetime(2017, 7, 1)
    all_commits = []

    start = datetime.now()
    for commit in Repository('test-repos-hadoop/hadoop',
                             since=dt1,
                             to=dt2).traverse_commits():
        memory = p.memory_info()[0] / (2 ** 20)
        all_commits.append(memory)

        h = commit.author.name  # noqa

        if _type == 0:
            continue

        for mod in commit.modified_files:
            dd = mod.diff  # noqa

            if _type == 1:
                continue

            if mod.filename.endswith('.java'):
                cc = mod.complexity  # noqa

    end = datetime.now()

    diff = end - start

    return diff, all_commits
예제 #11
0
def test_projectname_multiple_repos():
    repos = [
        'test-repos/files_in_directories', 'test-repos/files_in_directories',
        'test-repos/files_in_directories'
    ]
    for commit in Repository(path_to_repo=repos).traverse_commits():
        assert commit.project_name == 'files_in_directories'
예제 #12
0
def test_diff_with_histogram(git_repo):
    # with histogram
    commit = list(
        Repository('test-repos/histogram',
                   single="93df8676e6fab70d9677e94fd0f6b17db095e890",
                   histogram_diff=True).traverse_commits())[0]
    diff = commit.modified_files[0].diff_parsed
    assert (4, '    {') in diff["added"]
    assert (5, '        log.error("Icon path is null");') in diff["added"]
    assert (6, '        return null;') in diff["added"]
    assert (7, '    }') in diff["added"]
    assert (8, '') in diff["added"]
    assert (11, '    if (imgURL == null)') in diff["added"]
    assert (12, '    {') in diff["added"]
    assert (13, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["added"]
    assert (14, '        return null;') in diff["added"]
    assert (17, '        return new ImageIcon(imgURL);') in diff["added"]

    assert (6, '    {') in diff["deleted"]
    assert (7, '        return new ImageIcon(imgURL);') in diff["deleted"]
    assert (10, '    {') in diff["deleted"]
    assert (11, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["deleted"]
    assert (12, '    }') in diff["deleted"]
    assert (13, '    return null;') in diff["deleted"]
예제 #13
0
def test_diff_without_histogram(git_repo):
    # without histogram
    commit = list(
        Repository('test-repos/histogram',
                   single="93df8676e6fab70d9677e94fd0f6b17db095e890").
        traverse_commits())[0]

    diff = commit.modified_files[0].diff_parsed
    assert len(diff['added']) == 11
    assert (3, '    if (path == null)') in diff['added']
    assert (5, '        log.error("Icon path is null");') in diff['added']
    assert (6, '        return null;') in diff['added']
    assert (8, '') in diff['added']
    assert (9, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['added']
    assert (10, '') in diff['added']
    assert (11, '    if (imgURL == null)') in diff['added']
    assert (12, '    {') in diff['added']
    assert (14, '        return null;') in diff['added']
    assert (16, '    else') in diff['added']
    assert (17, '        return new ImageIcon(imgURL);') in diff['added']

    assert len(diff['deleted']) == 7
    assert (3, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['deleted']
    assert (4, '') in diff['deleted']
    assert (5, '    if (imgURL != null)') in diff['deleted']
    assert (7, '        return new ImageIcon(imgURL);') in diff['deleted']
    assert (9, '    else') in diff['deleted']
    assert (10, '    {') in diff['deleted']
    assert (13, '    return null;') in diff['deleted']
예제 #14
0
def test_clone_repo_to(tmp_path):
    dt2 = datetime(2018, 10, 20)
    url = "https://github.com/ishepard/pydriller.git"
    assert len(
        list(
            Repository(path_to_repo=url, to=dt2,
                       clone_repo_to=str(tmp_path)).traverse_commits())) == 159
    assert tmp_path.exists() is True
예제 #15
0
    def get_impacted_files(
            self,
            fix_commit_hash: str,
            file_ext_to_parse: List[str] = None,
            only_deleted_lines: bool = True) -> List['ImpactedFile']:
        """
         Parse the diff of given fix commit using PyDriller to obtain a list of ImpactedFile with
         impacted file path and modified line ranges. As default behaviour, all deleted lines in the diff which
         are also added are treated as modified lines.

        :param List[str] file_ext_to_parse: parse only the given file extensions
        :param only_deleted_lines: considers as modified lines only the line numbers that are deleted and added.
            By default, only deleted lines are considered
        :param str fix_commit_hash: hash of fix commit to parse
        :returns List[ImpactedFile] impacted_files
        """
        impacted_files = list()

        # fix_commit = PyDrillerGitRepo(self.repository_path).get_commit(fix_commit_hash)
        fix_commit = next(
            Repository(self.repository_path,
                       single=fix_commit_hash).traverse_commits())
        for mod in fix_commit.modified_files:
            # skip newly added files
            if not mod.old_path:
                continue

            # filter files by extension
            if file_ext_to_parse:
                ext = mod.filename.split('.')
                if len(ext) < 2 or (len(ext) > 1
                                    and ext[1] not in file_ext_to_parse):
                    log.info(f"skip file: {mod.filename}")
                    continue

            file_path = mod.new_path
            if mod.change_type == ModificationType.DELETE or mod.change_type == ModificationType.RENAME:
                file_path = mod.old_path

            lines_added = [added[0] for added in mod.diff_parsed['added']]
            lines_deleted = [
                deleted[0] for deleted in mod.diff_parsed['deleted']
            ]

            if only_deleted_lines:
                mod_lines = lines_deleted
            else:
                mod_lines = [ld for ld in lines_deleted if ld in lines_added]

            if len(mod_lines) > self.max_file_modifications:
                log.warning("File changes too large")
                continue
            if len(mod_lines) > 0:
                impacted_files.append(ImpactedFile(file_path, mod_lines))

        log.info([str(f) for f in impacted_files])

        return impacted_files
예제 #16
0
def test_ignore_add_whitespaces_and_modified_normal_line(git_repo):
    commit = list(
        Repository('test-repos/whitespace',
                   single="52716ef1f11e07308b5df1b313aec5496d5e91ce").
        traverse_commits())[0]
    assert len(commit.modified_files) == 1
    parsed_normal_diff = commit.modified_files[0].diff_parsed
    commit = list(
        Repository('test-repos/whitespace',
                   skip_whitespaces=True,
                   single="52716ef1f11e07308b5df1b313aec5496d5e91ce").
        traverse_commits())[0]
    assert len(commit.modified_files) == 1
    parsed_wo_whitespaces_diff = commit.modified_files[0].diff_parsed
    assert len(parsed_normal_diff['added']) == 2
    assert len(parsed_wo_whitespaces_diff['added']) == 1

    assert len(parsed_normal_diff['deleted']) == 1
    assert len(parsed_wo_whitespaces_diff['deleted']) == 0
예제 #17
0
    def mine(self, **kwargs: Any) -> None:
        """Gather data from repository. To be extended in subclasses."""
        miner = Repository(self.repo, **kwargs)

        for commit in miner.traverse_commits():
            try:
                self.mine_commit(commit)
            except GitCommandError as err:
                # Warn about failing git commands, but continue
                warnings.warn(str(err))
예제 #18
0
def test_deletion_remotes():
    repos = [
        'https://github.com/ishepard/pydriller',
        'https://github.com/ishepard/pydriller'
    ]
    paths = set()
    for commit in Repository(path_to_repo=repos).traverse_commits():
        paths.add(commit.project_path)

    for path in paths:
        assert os.path.exists(path) is False
예제 #19
0
    def visualizar_parents(self):

        rm = Repository(
            "C:\\Users\\Leandro César\\Documents\\Nova pasta\\junit4")

        for commit in rm.traverse_commits():
            if len(commit.parents) == 2:
                print(list[commit.parents])
            else:
                print(list[commit.parents])

        pass
예제 #20
0
    def visualizar_commits(self):
        for commit in Repository(
                "C:\\Users\\Leandro César\\Documents\\Nova pasta\\junit4"
        ).traverse_commits():
            print('O commit de número {} foi modificado pelo autor {}, '
                  'e comitado por {} na data {}'.format(
                      commit.hash, commit.author.name, commit.committer.name,
                      commit.committer_date))

        botao = input("Pressione qualquer botao para sair")
        print("\x1b[2J\x1b[1;1H")

        pass
예제 #21
0
    def get_merge_commits(self, commit_hash: str) -> Set[str]:
        merge = set()
        repo_mining = Repository(single=commit_hash, path_to_repo=self.repository_path).traverse_commits()
        for commit in repo_mining:
            try:
                if commit.merge:
                    merge.add(commit.hash)
            except Exception as e:
                log.error(f'unable to analyze commit: {self.repository_path} {commit.hash}')

        if len(merge) > 0:
            log.info(f'merge commits count: {len(merge)}')

        return merge
예제 #22
0
def determine_commits_in_period(repository, start_date, end_date):
    """Determine the commits that are made in the period from start_date to end_date."""

    production_code_commit_count = 0
    test_code_commit_count = 0
    number_of_commits = 0
    for commit in Repository(repository, since=start_date,
                             to=end_date).traverse_commits():
        if commit_contains_test_code(commit):
            test_code_commit_count = test_code_commit_count + 1
        if commit_contains_production_code(commit):
            production_code_commit_count = production_code_commit_count + 1
        number_of_commits = number_of_commits + 1
    return number_of_commits, production_code_commit_count, test_code_commit_count
예제 #23
0
 def dictionaryWithAllCommmits(self):
     dictionaryAux = {}
     for commit in Repository(self.repository).traverse_commits():
         commitAuthorNameFormatted = '{}'.format(commit.author.name)
         commitAuthorDateFormatted = '{}'.format(commit.author_date)
         listFilesModifiedInCommit = []
         for modification in commit.modified_files:
             itemMofied = '{}'.format(modification.filename)
             listFilesModifiedInCommit.append(itemMofied)
         dictionaryAux[commit.hash] = [
             commitAuthorNameFormatted, commitAuthorDateFormatted,
             listFilesModifiedInCommit
         ]
     return dictionaryAux
예제 #24
0
 def save_commits_and_authors_in_json(self, user_id):
     list_of_commits = list()
     list_of_authors = list()
     for commit in Repository(self.repository).traverse_commits():
         list_of_commits.append(commit.hash)
         list_of_authors.append(commit.author.name)
     authors = set(list_of_authors)
     authors = list(authors)
     dict_commits = {}
     dict_commits[self.name] = list_of_commits
     Util.save_dictionary_in_json_file(self.name, user_id, dict_commits,
                                       'commits')
     dict_authors = {}
     dict_authors[self.name] = authors
     Util.save_dictionary_in_json_file(self.name, user_id, dict_authors,
                                       'authors')
예제 #25
0
    def visualizar_arquivo_especifico(self):

        caminho = "C:\\Users\\Leandro César\\Documents\\Nova pasta\\junit4"
        caminho_do_arquivo = input(
            "Cole o caminho a partir do diretório src: "
        )  #"\\src\\test\\java\\junit\\tests\\AllTests.java"

        for commit in Repository(caminho,
                                 filepath=caminho +
                                 caminho_do_arquivo).traverse_commits():
            print(commit.hash)

        botao = input("Pressione qualquer botao para sair")

        print("\x1b[2J\x1b[1;1H")

        pass
예제 #26
0
    def get_meta_changes(self, commit_hash: str, current_file: str) -> Set[str]:
        meta_changes = set()
        repo_mining = Repository(path_to_repo=self.repository_path, single=commit_hash).traverse_commits()
        for commit in repo_mining:
            show_str = self.repository.git.show(commit.hash, '--summary').splitlines()
            if show_str and self._is_git_mode_change(show_str, current_file):
                log.info(f'exclude meta-change (file mode change): {current_file} {commit.hash}')
                meta_changes.add(commit.hash)
            else:
                try:
                    for m in commit.modified_files:
                        if (current_file == m.new_path or current_file == m.old_path) and (m.change_type in self.change_types_to_ignore):
                            log.info(f'exclude meta-change ({m.change_type}): {current_file} {commit.hash}')
                            meta_changes.add(commit.hash)
                except Exception as e:
                    log.error(f'unable to analyze commit: {self.repository_path} {commit.hash}')

        return meta_changes
def get_java_method_metrics(files, project_path, version):
    # 获取测试java文件的所有文件名
    file_names = get_all_name_of_files(files)
    num_of_add_method = 0
    num_of_sub_method = 0
    num_of_modify_method = 0

    def is_in(name, names):
        for index in range(0, len(names)):
            if name in names[index]:
                return index
        return -1

    for commit in Repository(project_path, single=version).traverse_commits():
        for m in commit.modified_files:
            # 查看提交文件的文件名是否属于测试文件
            index = is_in(m.filename, file_names)
            if index != -1:
                changed_methods = []
                for method in m.changed_methods:
                    changed_methods.append(method.name)
                num_of_modify_method += len(list(set(changed_methods)))
                # 得到所有修改了的方法名
                all_methods = []
                for method in m.methods:
                    all_methods.append(method.name)
                all_methods_before = []
                for method in m.methods_before:
                    all_methods_before.append(method.name)
                all_methods, all_methods_before = method_filter(
                    file_names, all_methods, all_methods_before)
                # 确定连续修改多个行数的修改块的第一个大括号前的语句是否包含在这些方法名中
                add, sub = judge_add_or_delete_method(all_methods,
                                                      all_methods_before)
                num_of_add_method += add
                num_of_sub_method += sub

    return [
        num_of_add_method, num_of_sub_method,
        num_of_modify_method - num_of_add_method - num_of_sub_method
    ]
예제 #28
0
    def process(self, force=False) -> Tuple[int, int]:
        filters = {}
        if self.author_name:
            filters['only_authors'] = [
                self.author_name,
            ]

        commits = Repository(self.repo_url, **filters).traverse_commits()

        self.get_entries().delete()

        entries_to_create = []
        for commit in commits:
            entries_to_create.append(
                Entry(title=commit.msg,
                      description=commit.hash,
                      date_on_timeline=commit.committer_date.astimezone(
                          pytz.UTC),
                      schema='commit',
                      source=self.entry_source,
                      extra_attributes={
                          'hash': commit.hash,
                          'url': self.get_commit_url(commit),
                          'author': {
                              'email': commit.author.email,
                              'name': commit.author.name,
                          },
                          'changes': {
                              'files': commit.files,
                              'insertions': commit.insertions,
                              'deletions': commit.deletions,
                          },
                          'repo': {
                              'name': self.get_repo_name()
                              or commit.project_name,
                              'url': self.get_repo_url(),
                          },
                      }))
        Entry.objects.bulk_create(entries_to_create)
        return len(entries_to_create), 0
예제 #29
0
def filter_commits(repo, commits):
    rels = []
    for commit_sha in commits:
        # has java not test
        if not list(
                filter(lambda x: x.is_java and not x.is_test,
                       commits[commit_sha])):
            continue
        c = next(
            Repository(repo.working_dir, single=commit_sha).traverse_commits())
        committed = list(
            map(
                lambda f: CommittedFile(commit_sha, f.new_path, f.added_lines,
                                        f.deleted_lines, f),
                filter(lambda f: f.language_supported and f.new_path,
                       c.modified_files)))
        if not list(
                filter(lambda x: x.is_java and not x.is_test and x.is_relevant,
                       committed)):
            continue
        rels.append(commit_sha)
    return rels
예제 #30
0
from pydriller import Repository

for commit in Repository(
        'C:\\Users\\Leandro César\\eclipse-workspace\\Analise_Git\\Junit4'
).traverse_commits():
    print('The commit {} has been modified by {}, '
          'committed by {} in date {}'.format(commit.hash, commit.author.name,
                                              commit.committer.name,
                                              commit.committer_date))