def __init__(self, repository: str, owner: str): self.owner = owner self.repository = os.path.split(repository)[-1] self.repo = GitRepository(repository) self.mining = RepositoryMining(repository) self.pairs = [] random.seed(42)
def update_has_impact_code(apps, schema_editor): # We can't import the Person model directly as it may be a newer # version than this migration expects. We use the historical version. Modification = apps.get_model('contributions', 'Modification') for mod in Modification.objects.filter(commit__tag__project_id=2): GR = GitRepository(mod.commit.tag.project.project_path) diff_text = GR.parse_diff(mod.diff) added_text = "" for line in diff_text['added']: added_text = added_text + "\n" + str( line[0]) + ' ' + "" + ' ' + line[1] deleted_text = "" for line in diff_text['deleted']: deleted_text = deleted_text + "\n" + str( line[0]) + ' ' + "" + ' ' + line[1] added_uncommented_lines = detect_impact_loc(added_text) deleted_uncommented_lines = detect_impact_loc(deleted_text) mod.has_impact_loc = added_uncommented_lines or deleted_uncommented_lines mod.save()
def get_NCOMM_NADEV_NNDEV_NSCTR(self, index: int, commits: List[MetricBean], beans: MyMetricBeans, my_commits: MyCommits) -> [int, int, int, int]: devs = [] packages = set() # commit_count = set() # for i in range(0, index): # git_hash = commits[i].git_hash # if git_hash in my_commits.hashes: # for path in my_commits.hashes[git_hash]: # for mb in beans.get(path): # commit_count.add(mb.git_hash) # devs.append(mb.committer_email) # packages.add(mb.package) gr = GitRepository(self.repo_path) commit = gr.get_commit(commits[index].git_hash) commit_count = 1 for mod in commit.modifications: if mod.new_path is not None and mod.new_path == commits[index].new_path: # This must be changed commits_modified_file = gr.get_commits_modified_file(mod.new_path) for cmf in commits_modified_file: commit_count += 1 c = gr.get_commit(cmf) devs.append(c.author.email) for m in c.modifications: path = m.new_path if m.new_path is not None else m.old_path if path.endswith(m.filename): package = path[:-(len(m.filename) + 1)] packages.add(package) return [commit_count, len(devs), len(set(devs)), len(packages)]
def ublame_cli(filename, patterns): filename = os.path.abspath(filename) repo_path = repo_path_for(filename) relative_filename = filename.split(repo_path)[-1].strip("/") repo = GitRepository(repo_path) for commit_hash in repo.get_commits_modified_file(relative_filename): commit = repo.get_commit(commit_hash) diff_commit(commit, patterns)
def main(): print('Running...') # Report_data: a list uses to store the result data report_data = [] # Get the repo path from command line arguments path = sys.argv[1] # Analyse the commit in the repo git_repo = GitRepository(path) commits = git_repo.get_list_commits() for i,commit in enumerate(commits): for j,mod in enumerate(commit.modifications): diffs = git_repo.parse_diff(mod.diff) # Get Method Info From Modification Detail add_func_name,add_func_sig,add_func_modifier,add_func_mod_row,add_func_params = get_func_info(diffs, 'added') del_func_name,del_func_sig,del_func_modifier,del_func_mod_row,del_func_params = get_func_info(diffs, 'deleted') # Find method that have removed a parameter # Consider the Overloading in Java method # 1 - added method number <= deleted method number for add_idx,add_func in enumerate(add_func_name): if add_func in del_func_name: add_override_count = add_func_name.count(add_func) del_override_count = del_func_name.count(add_func) if (add_override_count <= del_override_count): # Find the deleted method that near the added method line_num = add_func_mod_row[add_idx] del_idx = find_mod_func_pair(line_num, add_func, del_func_name, del_func_mod_row) if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]): if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1): if (all(elem in del_func_params[del_idx] for elem in add_func_params[add_idx])): new_sig = add_func + add_func_sig[add_idx] old_sig = add_func + del_func_sig[del_idx] report_data.append([commit.hash,mod.filename,old_sig,new_sig]) # 2 - added method number > deleted method number for del_idx,del_func in enumerate(del_func_name): if del_func in add_func_name: add_override_count = add_func_name.count(del_func) del_override_count = del_func_name.count(del_func) if (add_override_count > del_override_count): line_num = del_func_mod_row[del_idx] add_idx = find_mod_func_pair(line_num, del_func, add_func_name, add_func_mod_row) if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]): if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1): if (all(elem in del_func_params[del_idx] for elem in add_func_params[add_idx])): new_sig = del_func + add_func_sig[add_idx] old_sig = del_func + del_func_sig[del_idx] report_data.append([commit.hash,mod.filename,old_sig,new_sig]) # Save the report data to a CSV report_data = list(set(tuple(element) for element in report_data)) header = ['Commit SHA', 'Java File', 'Old function signature', 'New function signature'] report_data.insert(0,header) with open('report.csv', 'w', newline='') as resultFile: wr = csv.writer(resultFile, dialect='excel') wr.writerows(report_data) print('Finish!')
def main(): start = time.time() source_database = "" dst_database = "" path = "workdir" dest_project_database_controller = Project_commits_info_Controller( dst_database) dest_model_database_controller = Model_commits_info_Controller( dst_database) project_verbatim = Project_commits_verbatim_Controller(dst_database) model_verbatim = Model_commits_verbatim_Controller(dst_database) # create a database connection conn = create_connection(source_database) dst_conn = create_connection(dst_database) with dst_conn: processed_id, processed_mdl_name = get_id_name(dst_conn) with conn: id_urls = get_repo_id_urls(conn) for id_url in id_urls: id, url, model_files, hash = id_url if not os.path.exists(path): os.mkdir(path) if url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink" \ or url=="https://github.com/OpenCadd/Lego_nxt_car" \ or url=="https://github.com/StefanMack/ProjSensSys" \ or url=="https://github.com/chiloanel/UWMatlab"\ or url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink": continue try: if id not in processed_id: clone = "git clone " + url + " " + path os.system(clone) # Cloning gr = GitRepository(path) gr.checkout(hash) url = path project_lifetime = write_project_commit_info( url, id, hash, dest_project_database_controller, project_verbatim) write_model_commit_info(model_files, url, id, hash, dest_model_database_controller, project_lifetime, model_verbatim) else: logging.info("Skipping . ALready Processed {}".format(id)) except Exception as e: logging.error(e) continue finally: shutil.rmtree(path) end = time.time() logging.info("IT took {} seconds".format(end - start))
def main(): repo_path = sys.argv[1] repo_branch = 'master' base_dir_not_bug = "data2/not_bug/{}".format( os.path.basename(os.path.normpath(repo_path))) base_dir_bug = "data2/bug/{}".format( os.path.basename(os.path.normpath(repo_path))) if not os.path.exists(base_dir_bug): os.makedirs(base_dir_bug) if not os.path.exists(base_dir_not_bug): os.makedirs(base_dir_not_bug) gitRepo = GitRepository(repo_path) # commits = RepositoryMining(repo_path, only_in_branch=repo_branch).traverse_commits() all_files = gitRepo.files() counter = 0 total_files = len(all_files) for file in all_files: counter += 1 relative_path = os.path.relpath(file, repo_path) # print("[*] {}/{} {}".format(counter, total_files, relative_path)) print(relative_path) if not valid_source_file(file): continue gprepo = gitRepo.repo modifying_commits = gprepo.iter_commits('--all', paths=relative_path) buggy = False for commit in modifying_commits: print(commit.hexsha) if is_buggy_commit(commit.message): print(commit.message) buggy = True break file_name_to_write = "{}_{}".format(counter, os.path.basename(file)) if buggy: file_path_to_write = os.path.join(base_dir_bug, file_name_to_write) shutil.copyfile(file, file_path_to_write) else: file_path_to_write = os.path.join(base_dir_not_bug, file_name_to_write) shutil.copyfile(file, file_path_to_write) print("All Done!")
def checkout_all_commit(self, repo_cfg: dict): gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name']) df = pd.read_csv(self.cfg['paths']['commit_report'] + repo_cfg['commit_file'], header=0) df.sort_values(ascending=True, inplace=True, by=['author_date']) for commit in df['hash'].tolist(): gr.checkout(commit) repo_cfg['commit'] = commit for detector in self.detectors: detector.run_on(repo_cfg)
def setUp(self): name = "cassandra20200615" mode = "train" repositories = [{ "name": "cassandra20200615", "url": "", "CommitTarget": "", "filterFile": "", "codeIssueJira": "", "projectJira": "" }] parameters = {} option = { "name": name, "mode": mode, "repositories": repositories, "parameters": parameters #needless when to infer. } option = Option(option) self.dataset = Dataset(option.getRepositorieImproved()) self.repository = repositories[0] print( os.path.join(UtilPath.Test(), "testDataset", self.repository["name"], "repository")) self.gr = GitRepository( os.path.join(UtilPath.Test(), "testDataset", self.repository["name"], "repository"))
def init(path=os.curdir): """Initialize enguard in a git repository.""" repo = GitRepository(path) io = FileIO() init_config(repo.path, io) install_hooks(repo.path, io)
def register_git_projects(conf: ConfigParser = None) -> None: conf = conf or crawler_config().conf gl = gitlab_api() for key in [s for s in conf.sections() if s.find("project.") == 0]: section = conf[key] group, local_path = section.get("group"), section.get("local_path") if local_path and local_path[0] == "~": local_path = expanduser(local_path) project_type, filter = section.get("type", "MISC"), section.get("filter", "*") if local_path is None: # remote project, get project info from gitlab try: for proj in gl.groups.get(group).projects.list( include_subgroups=True, as_list=False ): register_remote_repository(proj, project_type) except GitlabGetError as e: current_app.logger.info(f"gitlab search {group} has some error {e}") else: for path in glob.glob(f"{local_path}/{filter}", recursive=True): try: if GitRepository(path).total_commits() > 0: register_local_repository(path, project_type) except InvalidGitRepositoryError: current_app.logger.info(f"skipping non Git path {path}") except Exception: current_app.logger.info(f"skipping invalid repository path {path}")
def create_some_commit(repo_path: str, file_name: str = "dummy.txt") -> None: repo = GitRepository(repo_path).repo a_file = f"{repo_path}/{file_name}" with open(a_file, "w") as f: f.write("something\n") repo.index.add(a_file) repo.index.commit("some commit")
def prepare(self): for repository in self.repositories: datasetRepository = [] if (not os.path.exists(repository["path"])): pass #rawdata=Rawdata() gr = GitRepository(repository["path"]) pathsFile = [ pathFile for pathFile in glob.glob(repository["path"] + "/**/*.mjava", recursive=True) if re.match(repository["filterFile"], pathFile) ] commitsBug = self.getCommitsBug(repository) with tqdm( pathsFile, bar_format= "{desc}: {percentage:3.0f}%|{bar:10}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]" ) as pbar: for pathFile in pbar: nameFile = os.path.basename(pathFile) pbar.postfix = nameFile pbar.desc = repository["name"] datasetRepository.append( Data(gr, pathFile, commitsBug).getData()) self.dataset.extend(datasetRepository)
def test_tabs(): diff = '@@ -1,4 +1,17 @@\r\n' + \ ' a\r\n' + \ ' b\r\n' + \ '-c\r\n' + \ '+\td\r\n' + \ '+cc\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\tg\r\n' + \ '+\r\n' + \ '+j\r\n' + \ ' ' gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (3, 'c') in deleted assert 1 == len(deleted) assert (3, '\td') in added assert (4, 'cc') in added assert (5, '') in added assert (6, '') in added assert (7, '') in added assert (8, '') in added assert (9, '') in added assert (10, '') in added assert (11, '') in added assert (12, '') in added assert (13, '') in added assert (14, '\tg') in added assert (15, '') in added assert (16, 'j') in added assert 14 == len(added)
def get_SCTR(self, index: int, commits: List[MetricBean], beans: MyMetricBeans, my_commits: MyCommits) -> int: packages = set() # for i in range(0, index): # git_hash = commits[i].git_hash # if git_hash in my_commits.hashes: # for path in my_commits.hashes[git_hash]: # for mb in beans.get(path): # if commits[index].committer_email == mb.committer_email: # packages.add(mb.package) gr = GitRepository(self.repo_path) commit = gr.get_commit(commits[index].git_hash) if commits[index].committer_email == commit.committer.email: for mod in commit.modifications: path = mod.new_path if mod.new_path is not None else mod.old_path if path.endswith(mod.filename): package = path[:-(len(mod.filename) + 1)] packages.add(package) return len(packages)
async def _update_commits( self, git_repo: GitRepository, current_commits: MutableMapping, checkout_point: str, ) -> None: for c in git_repo.get_list_commits(branch=checkout_point): if c.hash not in current_commits.keys(): current_commits[c.hash] = c
def test_deletions(): diff = '@@ -2,6 +2,7 @@ aa\r\n' + \ ' bb\r\n' + \ ' cc\r\n' + \ ' log.info(\"aa\")\r\n' + \ '-log.debug(\"b\")\r\n' + \ ' dd\r\n' + \ ' ee\r\n' + \ ' ff' gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (5, 'log.debug("b")') in deleted assert len(deleted) == 1 assert len(added) == 0
def __init__(self, repoURL, first=None, second=None, fromCommit=None, since=None, to=None): start = time.perf_counter() self.__gitRepo = GitRepository(repoURL) if first is not None and second is not None and since is None and to is None: self.repo = RepositoryMining(repoURL, from_commit=first, to_commit=second) self.__repo_type = RepoType.BETWEEN_COMMITS elif first is not None and second is None and since is None and to is None: self.repo = RepositoryMining(repoURL, single=first) self.__repo_type = RepoType.SINGLE_COMMIT elif first is None and second is None and since is not None and to is not None: try: date1 = parser.parse(since) date2 = parser.parse(to) self.repo = RepositoryMining(repoURL, since=date1, to=date2) self.__repo_type = RepoType.DATETIME except Exception: raise Exception("Entered Datetime is not valid.") elif fromCommit is not None: self.repo = RepositoryMining(path_to_repo=repoURL, from_commit=fromCommit) self.__repo_type = RepoType.FROM_COMMIT else: self.repo = RepositoryMining(path_to_repo=repoURL) self.__repo_type = RepoType.ALL print("repoMiner was created") self.__files = [] # number of analyzed files self.__files_with_methods = [] self.__test_files = [] # number of test files self.__production_files = [] # number of production files self.__commits = [] # List[str] of analysed commits hash self.__commits_with_modified_methods = set( ) # List[str] of analysed commits with modified methods hash self.__production_methods = [] # List[ModifiedMethods] self.__test_methods = [] # List[ModifiedMethods] self.__modified_methods = [] # List[ModifiedMethods] self.__moved_files_without_changes = [ ] # List of files without changes self.__analyzed_commits = [] # List[AnalyzedCommits] self.__matched_files = [] # List of matched files self.__not_matched_files = None # instance of NotMatchedFiles self.__GetModifications() # performs analysis end = time.perf_counter() self.__analyse_time = "{:.2f}".format( (end - start) / 60) # analysis performing time
def find_occurence_in_commit(commit, word, file): conditional_added = 0 commit_with_conditional = [] commit_with_removed_conditional = [] for m in commit.modifications: if (str(m.source_code).find(word) != -1): file.write("************** date : " + str(commit.committer_date) + "*****************\n") diff = word gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) if (len(parsed_lines['added']) > 0): conditional_added = conditional_added + len( parsed_lines['added']) lines = str(m.source_code).splitlines() commit_with_conditional.append(m.new_path) for line in lines: if line.find(word) != -1: file.write("\t\tligne ajouté : {}\n".format(line)) if (len(parsed_lines['deleted']) > 0): conditional_added = conditional_added + len( parsed_lines['deleted']) lines = str(m.source_code).splitlines() commit_with_removed_conditional.append(m.new_path) for line in lines: if line.find(word) != -1: file.write("\t\tligne retiré : {}\n".format(line)) if (len(commit_with_conditional) > 0): file.write(str(commit_with_conditional) + "\n\n") return conditional_added
def test_real_example(): diff = '@@ -72,7 +72,7 @@ public class GitRepository implements SCM {\r\n' + \ ' \r\n' + \ ' private static Logger log = Logger.getLogger(GitRepository.class);\r\n' + \ ' \r\n' + \ '- public GitRepository(String path) {\r\n' + \ '+ public GitRepository2(String path) {\r\n' + \ ' this.path = path;\r\n' + \ ' this.maxNumberFilesInACommit = checkMaxNumberOfFiles();\r\n' + \ ' this.maxSizeOfDiff = checkMaxSizeOfDiff();\r\n' + \ '@@ -155,7 +155,7 @@ public class GitRepository implements SCM {\r\n' + \ ' return git.getRepository().getBranch();\r\n' + \ ' }\r\n' + \ ' \r\n' + \ '- public ChangeSet getHead() {\r\n' + \ '+ public ChangeSet getHead2() {\r\n' + \ ' Git git = null;\r\n' + \ ' try {\r\n' + \ ' git = openRepository();\r\n' + \ '@@ -320,6 +320,7 @@ public class GitRepository implements SCM {\r\n' + \ ' \r\n' + \ ' return diffs;\r\n' + \ ' }\r\n' + \ '+ newline\r\n' + \ ' \r\n' + \ ' private void setContext(DiffFormatter df) {\r\n' + \ ' String context = System.getProperty(\"git.diffcontext\");' gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (75, ' public GitRepository(String path) {') in deleted assert (158, ' public ChangeSet getHead() {') in deleted assert 2 == len(deleted) assert (75, ' public GitRepository2(String path) {') in added assert (158, ' public ChangeSet getHead2() {') in added assert (323, ' newline') in added assert 3 == len(added)
def test_ignore_add_whitespaces_and_modified_normal_line(): gr = GitRepository('test-repos/test14') commit = list( RepositoryMining('test-repos/test14', single="52716ef1f11e07308b5df1b313aec5496d5e91ce"). traverse_commits())[0] assert len(commit.modifications) == 1 parsed_normal_diff = gr.parse_diff(commit.modifications[0].diff) commit = list( RepositoryMining('test-repos/test14', skip_whitespaces=True, single="52716ef1f11e07308b5df1b313aec5496d5e91ce"). traverse_commits())[0] assert len(commit.modifications) == 1 parsed_wo_whitespaces_diff = gr.parse_diff(commit.modifications[0].diff) assert len(parsed_normal_diff['added']) == 2 assert len(parsed_wo_whitespaces_diff['added']) == 1 assert len(parsed_normal_diff['deleted']) == 1 assert len(parsed_wo_whitespaces_diff['deleted']) == 0
def test_diff_no_newline(): """ If a file ends without a newline git represents this with the additional line \\ No newline at end of file in diffs. This test asserts these additional lines are parsed correctly. """ gr = GitRepository('test-repos/no_newline') mod = gr.get_commit( '52a78c1ee5d100528eccba0a3d67371dbd22d898').modifications[0] added = mod.diff_parsed['added'] deleted = mod.diff_parsed['deleted'] assert ( 1, 'test1' ) in deleted # is considered as deleted as a 'newline' command is added assert (1, 'test1') in added # now with added 'newline' assert (2, 'test2') in added gr.clear()
def run_for_one_project(db_name,project_name, repo_path,thread_id=0): db_obj = db.DB() db_obj.set_db_name(db_name) cursor, conn = db_obj.connect_mysql() gr = GitRepository('{}/{}'.format(repo_path, project_name)) totalCommits = gr.total_commits() count = 0 try: for commit in RepositoryMining('{}/{}'.format(repo_path, project_name), only_modifications_with_file_types=['.java']).traverse_commits(): msg = commit.msg.lower() # convert the commit message to lower case for key in bugs: # def __init__(self,db_name, commit_id,project_name,message,identification_key,commit_date, author_name, author_email): if key in msg: # print("{}:{}:{}".format(key, msg, commit.hash)) bugfix = bug.BugFix(db_obj,cursor,conn, commit.hash, project_name, msg, key, commit.committer_date, commit.author.name, commit.author.email) bugfix.insert_into_database() # insert bugfix for modified_file in commit.modifications: if modified_file.filename.endswith('.java'): # print("{} Modified files: {}".format(commit.hash,modified_file.new_path) churn = modified_file.added + modified_file.removed bug_fix_file = bug.BugFixFile(db_obj,cursor,conn, commit.hash, modified_file.new_path, churn) bug_induce_commits = gr.get_commits_last_modified_lines(commit, modified_file) bugfix.set_induce_commits(bug_induce_commits.get(modified_file.new_path)) # at this point you can insert bug fix and modified files bugfix.insert_into_bug_fix_induce() # insert bug fix induce bug_fix_file.insert_into_database() # insert bug fix file try: for ind_commit in bug_induce_commits.get(modified_file.new_path): getDetailsOfInduceCommit(db_obj,cursor,conn, project_name, gr, ind_commit) except: print("no induce commits found") break count = count + 1 if count%100==0: print("Thread {}: Done processing: {} {}/{}".format(thread_id,commit.hash, count, totalCommits)) db_obj.close_connection(conn) except: print("Exception occured")
def computeRepoMetrics(repoPath): print("STARTED:" + repoPath) gr = GitRepository(repoPath) #for each repo calculate repo metrics in parallel # calculateStructAndSemanticScattering => # we finally return file pairs and their related semantic # and structural differences for each two pairs of files for the entire repo # analyzeCommits => # collects other related metrics for the interval parallelMetricProcessing(analyzeCommits(repoPath, gr), calculateStructAndSemanticScattering(gr), 30, gr) print("COMPLETED:" + repoPath)
def test_diff_histogram(): # without histogram commit = list( RepositoryMining('test-repos/test13', single="93df8676e6fab70d9677e94fd0f6b17db095e890"). traverse_commits())[0] mod = commit.modifications[0] gr = GitRepository('test-repos/test13') diff = gr.parse_diff(mod.diff) assert len(diff['added']) == 11 assert (3, ' if (path == null)') in diff['added'] assert (5, ' log.error("Icon path is null");') in diff['added'] assert (6, ' return null;') in diff['added'] assert (8, '') in diff['added'] assert (9, ' java.net.URL imgURL = GuiImporter.class.getResource(path);' ) in diff['added'] assert (10, '') in diff['added'] assert (11, ' if (imgURL == null)') in diff['added'] assert (12, ' {') in diff['added'] assert (14, ' return null;') in diff['added'] assert (16, ' else') in diff['added'] assert (17, ' return new ImageIcon(imgURL);') in diff['added'] assert len(diff['deleted']) == 7 assert (3, ' java.net.URL imgURL = GuiImporter.class.getResource(path);' ) in diff['deleted'] assert (4, '') in diff['deleted'] assert (5, ' if (imgURL != null)') in diff['deleted'] assert (7, ' return new ImageIcon(imgURL);') in diff['deleted'] assert (9, ' else') in diff['deleted'] assert (10, ' {') in diff['deleted'] assert (13, ' return null;') in diff['deleted'] # with histogram commit = list( RepositoryMining('test-repos/test13', single="93df8676e6fab70d9677e94fd0f6b17db095e890", histogram_diff=True).traverse_commits())[0] mod = commit.modifications[0] gr = GitRepository('test-repos/test13') diff = gr.parse_diff(mod.diff) assert (4, ' {') in diff["added"] assert (5, ' log.error("Icon path is null");') in diff["added"] assert (6, ' return null;') in diff["added"] assert (7, ' }') in diff["added"] assert (8, '') in diff["added"] assert (11, ' if (imgURL == null)') in diff["added"] assert (12, ' {') in diff["added"] assert (13, ' log.error("Couldn\'t find icon: " + imgURL);' ) in diff["added"] assert (14, ' return null;') in diff["added"] assert (17, ' return new ImageIcon(imgURL);') in diff["added"] assert (6, ' {') in diff["deleted"] assert (7, ' return new ImageIcon(imgURL);') in diff["deleted"] assert (10, ' {') in diff["deleted"] assert (11, ' log.error("Couldn\'t find icon: " + imgURL);' ) in diff["deleted"] assert (12, ' }') in diff["deleted"] assert (13, ' return null;') in diff["deleted"]
def checkout_refactored_commit(self, repo_cfg: dict, call_back: callable): gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name']) try: gr.reset() except: pass df = pd.read_csv(self.cfg['paths']['commit_report'] + repo_cfg['name'] + "_refactored.csv", header=0) commits = df['commit'].tolist() previous = df['previous'].tolist() finished_commit = [] # resume if output exist for file in os.listdir(self.cfg['paths']['smell_report'] + repo_cfg['name']): if re.match(r'\d+\_\w+\_\w+\.csv', file): file_id, cs_type, commit = file.split('_') finished_commit.append(commit.replace('.csv', '')) unique_commit = set(commits + previous) - {np.nan} unique_commit -= set(finished_commit) cpt = 0 cpt_total = len(unique_commit) for commit in unique_commit: cpt += 1 print(f"{commit}\t\t\t{repo_cfg['name']}\t{cpt}/{cpt_total}") gr.checkout(commit) print('\t checkout -done!') repo_cfg['commit'] = commit call_back(repo_cfg)
def test_extract_line_number_and_content(): diff = "@@ -1,8 +1,8 @@\r\n" + \ "-a\r\n" + \ "-b\r\n" + \ "-c\r\n" + \ "-log.info(\"a\")\r\n" + \ "-d\r\n" + \ "-e\r\n" + \ "-f\r\n" + \ "+aa\r\n" + \ "+bb\r\n" + \ "+cc\r\n" + \ "+log.info(\"aa\")\r\n" + \ "+dd\r\n" + \ "+ee\r\n" + \ "+ff\r\n" + \ " " gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (1, 'a') in deleted assert (2, 'b') in deleted assert (3, 'c') in deleted assert (4, 'log.info(\"a\")') in deleted assert (5, 'd') in deleted assert (6, 'e') in deleted assert (7, 'f') in deleted assert (1, 'aa') in added assert (2, 'bb') in added assert (3, 'cc') in added assert (4, 'log.info(\"aa\")') in added assert (5, 'dd') in added assert (6, 'ee') in added assert (7, 'ff') in added
def __get_bug_introducing_commits(self, path_to_repo, from_commit, to_commit): bug_introducing_commit_hashes = {} for commit in RepositoryMining(path_to_repo=path_to_repo, from_commit=from_commit, to_commit=to_commit, reversed_order=False).traverse_commits(): if self.__is_message_bugfix(commit.msg): bug_introducing = GitRepository(path_to_repo).get_commits_last_modified_lines(commit) for file in bug_introducing: previous = bug_introducing_commit_hashes.get(file, set()) for commits in bug_introducing[file]: previous.add(commits) bug_introducing_commit_hashes[file] = previous return bug_introducing_commit_hashes
def _drill(self): """Parses all commits :returns: a tupple containing a list of commits and a Pydriller GitRepository commit """ rep_obj = GitRepository(self.config.REPO) commits = [] for commit in RepositoryMining( self.config.REPO, since=self.config.START_DATE, to=self.config.END_DATE).traverse_commits(): commits.append(Commit(commit)) return commits, rep_obj
def clone_repo(user, repo_name): # viene copiato il repository nel path locale tmp_dir/repo_name e viene restituita in output try: url = "https://{}:{}@github.com/{}/{}".format( 'gastige', 'Sistemi.Distribuiti.2', user, repo_name) print("git clone {}".format(url)) repo_name = get_repo_name_from_url(url) path = tmp_dir/repo_name if os.path.exists(path): clean_tmp_dir(path) Repo.clone_from(url, path, progress=Progress()) print("\nRepository {} clonata.".format(repo_name)) return GitRepository(path) except: return None