from pydriller import GitRepository repo = '/Users/luca/TUProjects/Salerno/jpacman-framework' gr = GitRepository(repo) last_commit = gr.get_head() print('Commit: {} changed {} files.'.format(last_commit.hash, len(last_commit.modifications))) for mod in last_commit.modifications: print('File: {} has {} additions and {} deletions'.format( mod.filename, mod.added, mod.removed)) print('Complexity: {}. LOC {}'.format(mod.complexity, mod.nloc)) last_commit = gr.get_commit('51f041d') print('Commit: {} changed {} files.'.format(last_commit.hash, len(last_commit.modifications))) for mod in last_commit.modifications: print('File: {} has {} additions and {} deletions'.format( mod.filename, mod.added, mod.removed)) print('Complexity: {}. LOC {}'.format(mod.complexity, mod.nloc))
def mine_methods(self, start_commit: str, stop_commit: str, filter_methods: Set[str] = None, filter_files: Set[str] = None) -> int: methods = {} # Dict[str, List[MinerBean]] commits_to_analyze = -1 print('Mining: ' + self.repo_path) gr = GitRepository(self.repo_path) # Redefine start and stop commits print('Adjust start and stop commits.') first_commit = start_commit if start_commit is None: first_commit = gr.get_head().hash last_commit = stop_commit # Print start and stop commits info c1 = gr.get_commit(first_commit) print('Start: {} Author date: {} Committer date: {}'.format( c1.hash, c1.author_date, c1.committer_date)) c2 = gr.get_commit(last_commit) print('Stop: {} Author date: {} Committer date: {}'.format( c2.hash, c2.author_date, c2.committer_date)) # Unnecessary in production # Count commits to analyze print('Retrieve commits to analyze.') commits = [] for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True).traverse_commits(): commits.append(commit) print('{}) {} {}'.format(len(commits), commit.hash, commit.author_date)) commits_to_analyze = len(commits) # Open CSV file and write header saver = Saver(self.csv_file) saver.create_csv_file() saver.print_csv_header() # Traverse commits and calculate metrics commit_count = 0 # for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits(): for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True).traverse_commits(): buggy = True if commit.hash in self.bic_commits else False fix = True if commit.hash in self.fix_commits else False mod_analyzed_count = 0 count_files_per_commit = len(commit.modifications) for mod in commit.modifications: # Filter out unnecessary files if filter_files is None or mod.new_path in filter_files: if mod.filename.endswith(tuple(self.allowed_extensions)): mod_analyzed_count += 1 # Update key entry on rename if mod.change_type is ModificationType.RENAME: methods = self.update_keys(methods, mod.new_path, mod.old_path) if filter_files is not None: filter_files.add(mod.old_path) count_methods_per_file = len(mod.methods) for method in mod.methods: key = self.get_unique_key(mod.new_path, mod.old_path, method.name) # For unwanted keys prevent metric calculation if filter_methods is None or key in filter_methods: lines = mod.diff_parsed method_metrics = MethodMetrics( mod.source_code, method.start_line, method.end_line, lines, buggy, fix) m_touched = method_metrics.is_touched() m_fix = method_metrics.is_fix() m_buggy = method_metrics.is_buggy() mb = MetricsBean( commit.hash, commit.author_date, mod.new_path, method.name, method.start_line, mod.change_type.name, count_files_per_commit, mod.added, mod.removed, mod.nloc, mod.complexity, mod.token_count, count_methods_per_file, method_metrics.get_added_lines(), method_metrics.get_removed_lines(), method.nloc, method.complexity, method.token_count, buggy, fix, method_metrics.get_number_of_lines(), method.fan_in, method.fan_out, method.general_fan_out, len(method.parameters), commit.author.email, m_touched, m_fix, m_buggy) # Append new bean if key not in methods: methods[key] = [] methods.get(key, []).append(mb) # Going back in the past ADD is the moment in which the a file, consequently a method, is added therefore it can be removed from the disc and flushed into the CSV to save RAM if mod.change_type is ModificationType.ADD: self.flush_methods(methods, key, saver) commit_count += 1 print( 'Methods: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}' .format(len(methods), commit_count, commits_to_analyze, commit.hash, commit.author_date.strftime('%d/%m/%Y'), len(commit.modifications), mod_analyzed_count, buggy, fix)) for key, value in methods.items(): saver.add_method_to_csv(key, value) saver.close_csv_file() print('Mining ended') return commit_count
def simular_conflitos_do(configuracaoferramenta_escolhida, nome_branch_origem, \ nome_branch_forkeado, apagar_branch_merge, gerar_estatisticas = False): logs_de_execucao = [] arquivos_com_conflito = [] arquivos_e_trechos = dict() arquivos_e_estatisticas = dict() # busca a configuração para o id informado config = ConfiguracaoFerramenta.objects.get( pk=configuracaoferramenta_escolhida) gr = GitRepository(config.path_auxiliary_files) # atualiza as duas branches, fazendo uso de shell script externo shell_result = subprocess.run( ["./atualizar_branches.sh", nome_branch_origem, nome_branch_forkeado], stdout=subprocess.PIPE) shell_result_as_string = shell_result.stdout.decode('utf-8') for r in shell_result_as_string.split('\n'): logs_de_execucao.append(r) logs_de_execucao.append('Capturando último hash da branch ' + nome_branch_forkeado) print(('Capturando último hash da branch {}').format(nome_branch_forkeado)) gr.git().checkout(nome_branch_forkeado) hash_ultimo_commit_forkeado = gr.get_head().hash[0:7] # print(hash_ultimo_commit_forkeado) logs_de_execucao.append('Capturando último hash da branch ' + nome_branch_origem) print(('Capturando último hash da branch {}').format(nome_branch_origem)) gr.git().checkout(nome_branch_origem) hash_ultimo_commit_origem = gr.get_head().hash[0:7] # print(hash_ultimo_commit_origem) gr.git().checkout(nome_branch_forkeado) str_time = str(datetime.timestamp(datetime.now())).split('.')[0] nome_branch_merge = 'merge_origem_' + str(hash_ultimo_commit_origem) nome_branch_merge += '_forkeado_' + str(hash_ultimo_commit_forkeado) nome_branch_merge += '_t' + str_time logs_de_execucao.append('Criando branch de merge: ' + nome_branch_merge) print(('Criando branch de merge: {}').format(nome_branch_merge)) gr.git().branch(nome_branch_merge) gr.git().checkout(nome_branch_merge) logs_de_execucao.append('Fazendo o merge') print('Fazendo o merge') houve_conflitos = False try: # tenta fazer o merge; se executar sem erros é porque não houve conflito gr.git().merge(nome_branch_origem) except Exception as e: houve_conflitos = True linhas_com_erro = str(e) linhas_com_erro = linhas_com_erro.split('\n') arquivos_com_conflito = identificar_arquivos_em_conflito( linhas_com_erro) for a in arquivos_com_conflito: #numero_trechos_conflitantes = 0 linhas_conflitantes = [] caminho_completo = gr.path.as_posix() + '/' + a if not is_binary(caminho_completo): #numero_trechos_conflitantes = contar_ocorrencias_desta_linha_neste_arquivo( #'<<<<<<< HEAD', caminho_completo) linhas_conflitantes = identificar_intervalos_trechos_conflitantes( '<<<<<<< HEAD', '>>>>>>> ' + nome_branch_origem, caminho_completo) else: #numero_trechos_conflitantes = 1 linhas_conflitantes.append( ('todo o arquivo', 'arquivo binário')) arquivos_e_trechos[caminho_completo] = linhas_conflitantes if gerar_estatisticas: arquivos_e_estatisticas[ caminho_completo] = identificar_estatisticas_de_autores( gr, caminho_completo) if houve_conflitos and apagar_branch_merge: gr.git().merge('--abort') gr.git().checkout('master') logs_de_execucao.append('Desfazendo o merge') print('Desfazendo o merge') # apaga a branch do merge, fazendo uso de shell script externo shell_result = subprocess.run( ["./apagar_branch.sh", nome_branch_merge], stdout=subprocess.PIPE) shell_result_as_string = shell_result.stdout.decode('utf-8') for r in shell_result_as_string.split('\n'): logs_de_execucao.append(r) elif houve_conflitos and not apagar_branch_merge: print('Fazendo commit sem resolver conflitos') gr.git().add('.') gr.git().commit('-m "Commit SEM resolver conflitos"') gr.git().checkout('master') return (logs_de_execucao, arquivos_e_trechos, arquivos_e_estatisticas)