def checkout_refactored_commit(self, repo_cfg: dict, call_back: callable): gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name']) try: gr.reset() except: pass df = pd.read_csv(self.cfg['paths']['commit_report'] + repo_cfg['name'] + "_refactored.csv", header=0) commits = df['commit'].tolist() previous = df['previous'].tolist() finished_commit = [] # resume if output exist for file in os.listdir(self.cfg['paths']['smell_report'] + repo_cfg['name']): if re.match(r'\d+\_\w+\_\w+\.csv', file): file_id, cs_type, commit = file.split('_') finished_commit.append(commit.replace('.csv', '')) unique_commit = set(commits + previous) - {np.nan} unique_commit -= set(finished_commit) cpt = 0 cpt_total = len(unique_commit) for commit in unique_commit: cpt += 1 print(f"{commit}\t\t\t{repo_cfg['name']}\t{cpt}/{cpt_total}") gr.checkout(commit) print('\t checkout -done!') repo_cfg['commit'] = commit call_back(repo_cfg)
def main(): start = time.time() source_database = "" dst_database = "" path = "workdir" dest_project_database_controller = Project_commits_info_Controller( dst_database) dest_model_database_controller = Model_commits_info_Controller( dst_database) project_verbatim = Project_commits_verbatim_Controller(dst_database) model_verbatim = Model_commits_verbatim_Controller(dst_database) # create a database connection conn = create_connection(source_database) dst_conn = create_connection(dst_database) with dst_conn: processed_id, processed_mdl_name = get_id_name(dst_conn) with conn: id_urls = get_repo_id_urls(conn) for id_url in id_urls: id, url, model_files, hash = id_url if not os.path.exists(path): os.mkdir(path) if url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink" \ or url=="https://github.com/OpenCadd/Lego_nxt_car" \ or url=="https://github.com/StefanMack/ProjSensSys" \ or url=="https://github.com/chiloanel/UWMatlab"\ or url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink": continue try: if id not in processed_id: clone = "git clone " + url + " " + path os.system(clone) # Cloning gr = GitRepository(path) gr.checkout(hash) url = path project_lifetime = write_project_commit_info( url, id, hash, dest_project_database_controller, project_verbatim) write_model_commit_info(model_files, url, id, hash, dest_model_database_controller, project_lifetime, model_verbatim) else: logging.info("Skipping . ALready Processed {}".format(id)) except Exception as e: logging.error(e) continue finally: shutil.rmtree(path) end = time.time() logging.info("IT took {} seconds".format(end - start))
def checkout_all_commit(self, repo_cfg: dict): gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name']) df = pd.read_csv(self.cfg['paths']['commit_report'] + repo_cfg['commit_file'], header=0) df.sort_values(ascending=True, inplace=True, by=['author_date']) for commit in df['hash'].tolist(): gr.checkout(commit) repo_cfg['commit'] = commit for detector in self.detectors: detector.run_on(repo_cfg)
def main(): repo_path = sys.argv[1] repo_branch = 'master' commits = RepositoryMining(repo_path, only_in_branch=repo_branch).traverse_commits() commits = [commit for commit in commits] gitRepo = GitRepository(repo_path) start_date = commits[0].committer_date + relativedelta(years=3) last_date = commits[-1].committer_date - relativedelta(years=3) bug_tracker = defaultdict(list) bug_tracker_pickle = "data3/{}.pickle".format( os.path.basename(os.path.normpath(repo_path))) # First index the buggy files if os.path.exists(bug_tracker_pickle): with open(bug_tracker_pickle, 'rb') as handle: bug_tracker = pickle.load(handle) else: for commit_index, commit in enumerate(commits): if not is_bugfix_commit(commit.msg): continue try: for m in commit.modifications: if not valid_source_file(m.filename): continue bug_commit = gitRepo.get_commits_last_modified_lines( commit, m) ### uses SZZ # if bug_commit == {}: continue bug_start_index = 99999999999999999999 for _file in bug_commit: for i, _commit in enumerate(commits[:commit_index]): if _commit.hash in bug_commit[_file] \ and i<bug_start_index: bug_start_index = i for _commit in commits[bug_start_index:commit_index]: bug_tracker[_commit.hash].append(m.filename) except Exception as e: print("[***]", e) print(traceback.format_exc()) print("Continuing for next commits") print(len(bug_tracker.keys())) with open(bug_tracker_pickle, 'wb') as handle: pickle.dump(bug_tracker, handle, protocol=pickle.HIGHEST_PROTOCOL) # Copy the files with open('maj_versions/{}.hash'.format( os.path.basename(os.path.normpath(repo_path)))) as f: major_releases = [] for line in f.read().splitlines(): tag, hash = line.split(',') major_releases.append((tag, hash)) for version, commit in enumerate(commits): if not commit.hash in [item[1] for item in major_releases]: continue if commit.committer_date < start_date or commit.committer_date > last_date: continue for tag, hash in major_releases: if hash == commit.hash: break print("[*] Doing {}".format(tag)) gitRepo.checkout(commit.hash) base_dir_not_bug = "data3/{}/{}/not_bug".format( os.path.basename(os.path.normpath(repo_path)), tag) base_dir_bug = "data3/{}/{}/bug".format( os.path.basename(os.path.normpath(repo_path)), tag) if not os.path.exists(base_dir_bug): os.makedirs(base_dir_bug) if not os.path.exists(base_dir_not_bug): os.makedirs(base_dir_not_bug) all_files = gitRepo.files() for _file in all_files: if not valid_source_file(_file): continue filename = os.path.basename(os.path.normpath(_file)) if commit.hash in bug_tracker and filename in bug_tracker[ commit.hash]: file_path_to_write = os.path.join(base_dir_bug, filename) else: file_path_to_write = os.path.join(base_dir_not_bug, filename) shutil.copyfile(_file, file_path_to_write) print("All Done!")
class RepositoryProcessor: def __init__(self, repository: str, owner: str): self.owner = owner self.repository = os.path.split(repository)[-1] self.repo = GitRepository(repository) self.mining = RepositoryMining(repository) self.pairs = [] random.seed(42) def run(self): self.get_all_filepairs() with open(os.path.join('filepairs', self.repository, 'pairs.txt'), 'w') as f: f.write('\n'.join( map(lambda x: f'{x[0]} {x[1]} {x[2]}', self.pairs))) f.write('\n') def get_all_filepairs(self, file_filter=java_file_filter): commits = list( filter(lambda x: not x.merge, self.mining.traverse_commits())) for commit in commits: for modification in commit.modifications: if modification.change_type == ModificationType.MODIFY: if file_filter(modification.filename): self.get_file_pair(commit, modification) def get_file_pair(self, commit, modification: Modification): parent = commit.parents[0] repo = self.repo.project_name commit_hash = commit.hash filename = modification.filename path = os.path.join('filepairs', repo, commit_hash, filename) os.makedirs(path, exist_ok=True) self.repo.checkout(parent) before = os.path.join(self.repository, modification.old_path) before_saved = os.path.join(path, 'before_' + commit_hash + '_' + filename) copyfile(before, before_saved) self.repo.checkout(commit_hash) after = os.path.join(self.repository, modification.new_path) after_saved = os.path.join(path, 'after__' + commit_hash + '_' + filename) copyfile(after, after_saved) self.pairs.append( (before_saved, after_saved, commit_hash + '.' + self.owner + '.' + before.replace('/', '.'))) def run_random(self, number): self.get_random_filepairs(number) with open(os.path.join('filepairs', self.repository, 'pairs.txt'), 'w') as f: f.write('\n'.join( map(lambda x: f'{x[0]} {x[1]} {x[2]}', self.pairs))) f.write('\n') def get_random_filepairs(self, number, file_filter=java_file_filter): commits = random.choices(list( filter(lambda x: not x.merge, self.mining.traverse_commits())), k=number) for idx, commit in enumerate(commits): print(f'Processing commit №{idx}: {commit.hash}') for modification in commit.modifications: if modification.change_type == ModificationType.MODIFY: if file_filter(modification.filename): self.get_file_pair(commit, modification)
def simular_conflitos(request): # busca as opções de configuração de execução da ferramenta registradas no banco de dados configuracaoferramenta_choices = ConfiguracaoFerramenta.objects.all( ).order_by('-id') configuracaoferramenta_choices_to_choicefield = list() for configuracao in configuracaoferramenta_choices: configuracaoferramenta_choices_to_choicefield.append( [configuracao.pk, configuracao]) # se GET cria o formulário em branco if request.method == 'GET': form = ExecutarFerramentaForm( configuracaoferramenta_choices_to_choicefield) title = 'Forkuptool - Módulo de análise de repositórios' subtitle = 'Selecione uma configuração para continuar' return render(request, 'simular_conflitos.html', locals()) # se POST será necessário processar os dados do formulário elif request.method == 'POST': configuracaoferramenta_escolhida = None if 'configuracaoferramenta_escolhida' in request.POST: configuracaoferramenta_escolhida = request.POST[ 'configuracaoferramenta_escolhida'] if configuracaoferramenta_escolhida: # busca a configuração para o id informado config = ConfiguracaoFerramenta.objects.get( pk=configuracaoferramenta_escolhida) gr = GitRepository(config.path_auxiliary_files) #TODO: mudar a selação dos commits, que hoje é fixo # commits_vendor = ( # 'fca529f', 'a639855', '168e36a', '48374cd', 'e0428a2', # '49275f2', '25ad58d', 'd7c1bdf', '75c8a35', '41f8ace', # '69e7a98', 'a8f4829', 'be98898', 'f7e815a', '03f178c', # 'fc7af49', '74a8748', 'a58320e', '1d4fa85', '855e138', # '671aaa8', '06a967e', '048aa85', '9119a3f', 'a74d6ae', # 'bb83f28', '8f5e756', '83610c5', '5c5fc7d', '5e74902', # 'ce59547', '923f5ee', 'df8666d', 'c219a97', 'aefcd22', # '0be9ee9', 'bb3c479', 'e0557a0', 'f02b0c3', '8d5cdfa', # 'e944c53', # '0a93957',) commits_vendor = ('48d1edb', ) # commits_client = ( # 'a9cb50e', '085aa2a', '66165fd', '6eb4d4c', '353a877', # 'b1bf9fe', '7891bb7', '70ddc11', '6a858e2', 'bbc8bea', # 'df85d8e', 'b57eca7', '914f9b1', 'e8faf15', '3a1a355', # 'da45411', '53464c7', '75f241c', '1e8e20b', '7c9583c', # '7482e54', 'a2c2ae2', 'dc18528', '5b308d9', 'fc62c99', # '2977dbf', '90aa392', 'd2c2f36', 'e083730', '2edc4b3', # '3328138', '3595f20', 'd6b40e5', '1c82a62', '757c692', # '0230fb0', '3094585', '43a6ab3', '151f8a3', 'ec90a78', # '5a51d1a', # '287f6ed',) commits_client = ('d472976', ) contador = 0 conflitos_por_rodada = dict() for c in commits_vendor: total_trechos_conflitantes = 0 total_linhas_conflitantes = 0 conflitos = dict() gr.git().checkout('master') gr.git().checkout(c) branch_vendor = 't' + str(contador + 1) + 'vendor' gr.git().branch(branch_vendor) gr.git().checkout('master') gr.checkout(commits_client[contador]) branch_client = 't' + str(contador + 1) + 'client' gr.git().branch(branch_client) branch_merge = 't' + str(contador + 1) + 'merge' gr.git().branch(branch_merge) gr.git().checkout(branch_merge) try: # tenta fazer o merge; se executar sem erros é porque não houve conflito gr.git().merge(branch_vendor) except Exception as e: linhas_com_erro = str(e) linhas_com_erro = linhas_com_erro.split('\n') arquivos_com_conflito = identificar_arquivos_em_conflito( linhas_com_erro) for a in arquivos_com_conflito: numero_trechos_conflitantes = 0 numero_linhas_conflitantes = 0 caminho_completo = gr.path.as_posix() + '/' + a if not is_binary(caminho_completo): numero_trechos_conflitantes = contar_ocorrencias_desta_linha_neste_arquivo( '<<<<<<< HEAD', caminho_completo) numero_linhas_conflitantes = contar_linhas_entre_esses_linhas_neste_arquivo( '<<<<<<< HEAD', '=======', caminho_completo) else: numero_trechos_conflitantes = 1 numero_linhas_conflitantes = 1 total_trechos_conflitantes += numero_trechos_conflitantes total_linhas_conflitantes += numero_linhas_conflitantes conflitos[ caminho_completo] = numero_trechos_conflitantes gr.git().merge('--abort') gr.git().checkout('master') print(('Processou par {}: {} - {}').format( (contador + 1), c, commits_client[contador])) conflitos_por_rodada[(contador + 1)] = (conflitos, total_trechos_conflitantes, total_linhas_conflitantes) contador += 1 print(conflitos_por_rodada) title = 'Forkuptool - Módulo de análise de repositórios' subtitle = 'Simulação de conflitos de mesclagem' return render(request, 'simular_conflitos_show.html', locals()) else: messages.error(request, 'Necessário informar uma configuração') return render(request, 'index.html', { 'title': 'Forkuptool', 'subtitle': 'Bem-vindo', })
import os from pydriller import GitRepository # # Collects statistic about module file count. # Will be replaced by SourceAnalyzer.py # repository = GitRepository('../magento2-git') repository._conf.set_value("main_branch", '2.4-develop') commit235p1 = repository.get_commit_from_tag('2.3.5-p1') repository.checkout(commit235p1.hash) path, dirs, files = next(os.walk("../magento2-git/app/code/Magento/Cron")) file_count = len(files) print(file_count)