from pydriller import GitRepository

repo = '/Users/luca/TUProjects/Salerno/jpacman-framework'

gr = GitRepository(repo)

last_commit = gr.get_head()
print('Commit: {} changed {} files.'.format(last_commit.hash,
                                            len(last_commit.modifications)))
for mod in last_commit.modifications:
    print('File: {} has {} additions and {} deletions'.format(
        mod.filename, mod.added, mod.removed))
    print('Complexity: {}. LOC {}'.format(mod.complexity, mod.nloc))

last_commit = gr.get_commit('51f041d')
print('Commit: {} changed {} files.'.format(last_commit.hash,
                                            len(last_commit.modifications)))
for mod in last_commit.modifications:
    print('File: {} has {} additions and {} deletions'.format(
        mod.filename, mod.added, mod.removed))
    print('Complexity: {}. LOC {}'.format(mod.complexity, mod.nloc))
    def mine_methods(self,
                     start_commit: str,
                     stop_commit: str,
                     filter_methods: Set[str] = None,
                     filter_files: Set[str] = None) -> int:
        methods = {}  # Dict[str, List[MinerBean]]
        commits_to_analyze = -1
        print('Mining: ' + self.repo_path)
        gr = GitRepository(self.repo_path)

        # Redefine start and stop commits
        print('Adjust start and stop commits.')
        first_commit = start_commit
        if start_commit is None:
            first_commit = gr.get_head().hash
        last_commit = stop_commit

        # Print start and stop commits info
        c1 = gr.get_commit(first_commit)
        print('Start: {} Author date: {} Committer date: {}'.format(
            c1.hash, c1.author_date, c1.committer_date))
        c2 = gr.get_commit(last_commit)
        print('Stop:  {} Author date: {} Committer date: {}'.format(
            c2.hash, c2.author_date, c2.committer_date))

        # Unnecessary in production
        # Count commits to analyze
        print('Retrieve commits to analyze.')
        commits = []
        for commit in RepositoryMining(self.repo_path,
                                       from_commit=first_commit,
                                       to_commit=last_commit,
                                       reversed_order=True).traverse_commits():
            commits.append(commit)
            print('{}) {} {}'.format(len(commits), commit.hash,
                                     commit.author_date))
        commits_to_analyze = len(commits)

        # Open CSV file and write header
        saver = Saver(self.csv_file)
        saver.create_csv_file()
        saver.print_csv_header()

        # Traverse commits and calculate metrics
        commit_count = 0
        # for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits():
        for commit in RepositoryMining(self.repo_path,
                                       from_commit=first_commit,
                                       to_commit=last_commit,
                                       reversed_order=True).traverse_commits():
            buggy = True if commit.hash in self.bic_commits else False
            fix = True if commit.hash in self.fix_commits else False
            mod_analyzed_count = 0
            count_files_per_commit = len(commit.modifications)
            for mod in commit.modifications:
                # Filter out unnecessary files
                if filter_files is None or mod.new_path in filter_files:
                    if mod.filename.endswith(tuple(self.allowed_extensions)):
                        mod_analyzed_count += 1
                        # Update key entry on rename
                        if mod.change_type is ModificationType.RENAME:
                            methods = self.update_keys(methods, mod.new_path,
                                                       mod.old_path)
                            if filter_files is not None:
                                filter_files.add(mod.old_path)
                        count_methods_per_file = len(mod.methods)
                        for method in mod.methods:
                            key = self.get_unique_key(mod.new_path,
                                                      mod.old_path,
                                                      method.name)
                            # For unwanted keys prevent metric calculation
                            if filter_methods is None or key in filter_methods:
                                lines = mod.diff_parsed
                                method_metrics = MethodMetrics(
                                    mod.source_code, method.start_line,
                                    method.end_line, lines, buggy, fix)
                                m_touched = method_metrics.is_touched()
                                m_fix = method_metrics.is_fix()
                                m_buggy = method_metrics.is_buggy()
                                mb = MetricsBean(
                                    commit.hash, commit.author_date,
                                    mod.new_path, method.name,
                                    method.start_line, mod.change_type.name,
                                    count_files_per_commit, mod.added,
                                    mod.removed, mod.nloc, mod.complexity,
                                    mod.token_count, count_methods_per_file,
                                    method_metrics.get_added_lines(),
                                    method_metrics.get_removed_lines(),
                                    method.nloc, method.complexity,
                                    method.token_count, buggy, fix,
                                    method_metrics.get_number_of_lines(),
                                    method.fan_in, method.fan_out,
                                    method.general_fan_out,
                                    len(method.parameters),
                                    commit.author.email, m_touched, m_fix,
                                    m_buggy)
                                # Append new bean
                                if key not in methods:
                                    methods[key] = []
                                methods.get(key, []).append(mb)
                                # Going back in the past ADD is the moment in which the a file, consequently a method, is added therefore it can be removed from the disc and flushed into the CSV to save RAM
                                if mod.change_type is ModificationType.ADD:
                                    self.flush_methods(methods, key, saver)
            commit_count += 1
            print(
                'Methods: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}'
                .format(len(methods), commit_count, commits_to_analyze,
                        commit.hash, commit.author_date.strftime('%d/%m/%Y'),
                        len(commit.modifications), mod_analyzed_count, buggy,
                        fix))
        for key, value in methods.items():
            saver.add_method_to_csv(key, value)
        saver.close_csv_file()
        print('Mining ended')
        return commit_count
Esempio n. 3
0
def simular_conflitos_do(configuracaoferramenta_escolhida, nome_branch_origem, \
 nome_branch_forkeado, apagar_branch_merge, gerar_estatisticas = False):
    logs_de_execucao = []
    arquivos_com_conflito = []
    arquivos_e_trechos = dict()
    arquivos_e_estatisticas = dict()

    # busca a configuração para o id informado
    config = ConfiguracaoFerramenta.objects.get(
        pk=configuracaoferramenta_escolhida)
    gr = GitRepository(config.path_auxiliary_files)

    # atualiza as duas branches, fazendo uso de shell script externo
    shell_result = subprocess.run(
        ["./atualizar_branches.sh", nome_branch_origem, nome_branch_forkeado],
        stdout=subprocess.PIPE)
    shell_result_as_string = shell_result.stdout.decode('utf-8')
    for r in shell_result_as_string.split('\n'):
        logs_de_execucao.append(r)

    logs_de_execucao.append('Capturando último hash da branch ' +
                            nome_branch_forkeado)
    print(('Capturando último hash da branch {}').format(nome_branch_forkeado))
    gr.git().checkout(nome_branch_forkeado)
    hash_ultimo_commit_forkeado = gr.get_head().hash[0:7]
    # print(hash_ultimo_commit_forkeado)
    logs_de_execucao.append('Capturando último hash da branch ' +
                            nome_branch_origem)
    print(('Capturando último hash da branch {}').format(nome_branch_origem))
    gr.git().checkout(nome_branch_origem)
    hash_ultimo_commit_origem = gr.get_head().hash[0:7]
    # print(hash_ultimo_commit_origem)
    gr.git().checkout(nome_branch_forkeado)
    str_time = str(datetime.timestamp(datetime.now())).split('.')[0]
    nome_branch_merge = 'merge_origem_' + str(hash_ultimo_commit_origem)
    nome_branch_merge += '_forkeado_' + str(hash_ultimo_commit_forkeado)
    nome_branch_merge += '_t' + str_time
    logs_de_execucao.append('Criando branch de merge: ' + nome_branch_merge)
    print(('Criando branch de merge: {}').format(nome_branch_merge))
    gr.git().branch(nome_branch_merge)
    gr.git().checkout(nome_branch_merge)

    logs_de_execucao.append('Fazendo o merge')
    print('Fazendo o merge')
    houve_conflitos = False
    try:
        # tenta fazer o merge; se executar sem erros é porque não houve conflito
        gr.git().merge(nome_branch_origem)

    except Exception as e:
        houve_conflitos = True
        linhas_com_erro = str(e)
        linhas_com_erro = linhas_com_erro.split('\n')
        arquivos_com_conflito = identificar_arquivos_em_conflito(
            linhas_com_erro)

        for a in arquivos_com_conflito:
            #numero_trechos_conflitantes = 0
            linhas_conflitantes = []
            caminho_completo = gr.path.as_posix() + '/' + a
            if not is_binary(caminho_completo):
                #numero_trechos_conflitantes = contar_ocorrencias_desta_linha_neste_arquivo(
                #'<<<<<<< HEAD', caminho_completo)
                linhas_conflitantes = identificar_intervalos_trechos_conflitantes(
                    '<<<<<<< HEAD', '>>>>>>> ' + nome_branch_origem,
                    caminho_completo)
            else:
                #numero_trechos_conflitantes = 1
                linhas_conflitantes.append(
                    ('todo o arquivo', 'arquivo binário'))

            arquivos_e_trechos[caminho_completo] = linhas_conflitantes
            if gerar_estatisticas:
                arquivos_e_estatisticas[
                    caminho_completo] = identificar_estatisticas_de_autores(
                        gr, caminho_completo)

    if houve_conflitos and apagar_branch_merge:
        gr.git().merge('--abort')
        gr.git().checkout('master')
        logs_de_execucao.append('Desfazendo o merge')
        print('Desfazendo o merge')
        # apaga a branch do merge, fazendo uso de shell script externo
        shell_result = subprocess.run(
            ["./apagar_branch.sh", nome_branch_merge], stdout=subprocess.PIPE)
        shell_result_as_string = shell_result.stdout.decode('utf-8')
        for r in shell_result_as_string.split('\n'):
            logs_de_execucao.append(r)

    elif houve_conflitos and not apagar_branch_merge:
        print('Fazendo commit sem resolver conflitos')
        gr.git().add('.')
        gr.git().commit('-m "Commit SEM resolver conflitos"')
        gr.git().checkout('master')

    return (logs_de_execucao, arquivos_e_trechos, arquivos_e_estatisticas)