def run_for_one_project(db_name,project_name, repo_path,thread_id=0): db_obj = db.DB() db_obj.set_db_name(db_name) cursor, conn = db_obj.connect_mysql() gr = GitRepository('{}/{}'.format(repo_path, project_name)) totalCommits = gr.total_commits() count = 0 try: for commit in RepositoryMining('{}/{}'.format(repo_path, project_name), only_modifications_with_file_types=['.java']).traverse_commits(): msg = commit.msg.lower() # convert the commit message to lower case for key in bugs: # def __init__(self,db_name, commit_id,project_name,message,identification_key,commit_date, author_name, author_email): if key in msg: # print("{}:{}:{}".format(key, msg, commit.hash)) bugfix = bug.BugFix(db_obj,cursor,conn, commit.hash, project_name, msg, key, commit.committer_date, commit.author.name, commit.author.email) bugfix.insert_into_database() # insert bugfix for modified_file in commit.modifications: if modified_file.filename.endswith('.java'): # print("{} Modified files: {}".format(commit.hash,modified_file.new_path) churn = modified_file.added + modified_file.removed bug_fix_file = bug.BugFixFile(db_obj,cursor,conn, commit.hash, modified_file.new_path, churn) bug_induce_commits = gr.get_commits_last_modified_lines(commit, modified_file) bugfix.set_induce_commits(bug_induce_commits.get(modified_file.new_path)) # at this point you can insert bug fix and modified files bugfix.insert_into_bug_fix_induce() # insert bug fix induce bug_fix_file.insert_into_database() # insert bug fix file try: for ind_commit in bug_induce_commits.get(modified_file.new_path): getDetailsOfInduceCommit(db_obj,cursor,conn, project_name, gr, ind_commit) except: print("no induce commits found") break count = count + 1 if count%100==0: print("Thread {}: Done processing: {} {}/{}".format(thread_id,commit.hash, count, totalCommits)) db_obj.close_connection(conn) except: print("Exception occured")
if "solves" in s: solves += 1 if "solved" in s: solved += 1 if "except" in s: exceptX += 1 # # print(Commit.msg) # get files code modifications # == displays all code modifications # print(modified_file.diff) # to facilitate the parsing of code modifications, we use the GitRepository Class in line 3 # diff =modified_file.diff # print(diff) # parsed_diff= repos.parse_diff(diff) # pprint(parsed_diff) # GET ALL BUGGY COMMITS bug_inducing_commits = repos.get_commits_last_modified_lines(Commit, modified_file) # print(bug_inducing_commits) BuggyCommits = bug_inducing_commits # print(BuggyCommits) # x = re.search("(?<=\')(.*?)(?=\')", str(BuggyCommits)) # print(x) # match=re.findall("(?<=\')(.*?)(?=\')", str(BuggyCommits)) match = re.findall("(?<=\ ')[a-zA-Z0-9 ]*", str(BuggyCommits)) # print(match) match1 = re.findall("(?<=\ {')[a-zA-Z0-9 ]*", str(BuggyCommits)) # print(match1) NbBugs = len(match) + len(match1) #print("Number of buggy commits before and after introduction of smells= " + str(NbBugs))
def miner(): repo_path = os.path.abspath(working_path + repo_name) # Clone if necessary if not os.path.exists(repo_path): print("Cloning: {}".format(repo_name)) for c in RepositoryMining(repo_git, clone_repo_to=os.path.abspath( working_path)).traverse_commits(): pass else: print("{} clone done!".format(repo_name)) # Extract FIX and BIC bic_csv = os.path.abspath(working_path + repo_name + "_bic.csv") header = [ "hash", "path", "size", "developer", "fix", "bic_path", "bic_hash", "bic_size" ] no_fix_count = fix_count = 0 if not os.path.exists(bic_csv): print("Extracting FIX and BIC") out_file = open(bic_csv, 'w', newline='', encoding="utf-8") writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header) writer.writeheader() to_date = datetime(2017, 6, 1, 12, 0, 0) gr = GitRepository(repo_path) gr2 = GitRepository(repo_path) for commit in RepositoryMining(repo_path, to=to_date, reversed_order=True).traverse_commits(): msg = commit.msg.lower() mods = commit.modifications if any(word in msg for word in keywords): dout = { "hash": commit.hash, "size": len(mods), "developer": commit.author.email, "fix": True } fix_count += 1 for mod in mods: dout["path"] = mod.new_path bics_per_mod = gr.get_commits_last_modified_lines( commit, mod) for bic_path, bic_commit_hashs in bics_per_mod.items(): dout["bic_path"] = bic_path for bic_commit_hash in bic_commit_hashs: bic = gr2.get_commit(bic_commit_hash) dout["bic_hash"] = bic_commit_hash dout["bic_size"] = len(bic.modifications) writer.writerow(dout) out_file.flush() if (len(mods)) == 0: dout["path"] = dout["bic_path"] = dout["bic_hash"] = dout[ "bic_size"] = "---" writer.writerow(dout) out_file.flush() else: no_fix_count += 1 dout = { "hash": commit.hash, "size": len(mods), "developer": commit.committer.email, "fix": False, "bic_path": "---", "bic_hash": "---", "bic_size": "---" } for mod in mods: dout["path"] = mod.new_path writer.writerow(dout) out_file.flush() if (len(mods)) == 0: dout["path"] = "---" writer.writerow(dout) out_file.flush() out_file.close() else: print("Extracting FIX and BIC done!") # Get unique BIC in_file = open(bic_csv, 'r', newline='', encoding="utf-8") reader = csv.DictReader(in_file, delimiter=',') unique_devs = set() unique_commits = set() fixes = set() unique_bics = set() unique_fics = set() for row in reader: unique_commits.add(row["hash"]) if row["path"].endswith(tuple(extensions)): unique_devs.add(row["developer"]) unique_bics.add(row["bic_hash"]) unique_fics.add(row["bic_path"]) if row["fix"] == "True": fixes.add(row["hash"]) unique_bics.remove("---") unique_fics.remove("---") in_file.close() print("Developers: {}, Commits: {} Defective: {} {} {}".format( len(unique_devs), len(unique_commits), len(fixes), no_fix_count, fix_count)) # Count fully and partially defective commits, and defective files in defective commits bic_csv = os.path.abspath(working_path + repo_name + "_partial_bic.csv") header = ["bic_hash", "bic_size", "bic_file_size", "bic_path", "defective"] if not os.path.exists(bic_csv): print("Counting partial BIC") out_file = open(bic_csv, 'w', newline='', encoding="utf-8") writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header) writer.writeheader() gr = GitRepository(repo_path) for bic_hash in unique_bics: commit = gr.get_commit(bic_hash) diff = count_file = len(commit.modifications) dout = { "bic_hash": bic_hash, "bic_size": len(commit.modifications) } for mod in commit.modifications: if mod.filename.endswith(tuple(extensions)): dout["bic_path"] = mod.new_path dout["bic_file_size"] = mod.nloc if mod.new_path in unique_fics: diff -= 1 dout["defective"] = True else: dout["defective"] = False writer.writerow(dout) out_file.flush() else: count_file -= 1 diff -= 1 out_file.close() else: print("Counting partial BIC done!") # Calculate partially defective commits in_file = open(bic_csv, 'r', newline='', encoding="utf-8") reader = csv.DictReader(in_file, delimiter=',') bics = {} fully_defective = partially_defective = 0 partially_defective_files = total_defective_files = 0 for row in reader: if row["bic_path"].endswith(tuple(extensions)): if row["bic_hash"] in bics: bics[row["bic_hash"]].append(row["defective"]) else: bics[row["bic_hash"]] = [row["defective"]] for key, value in bics.items(): count_defective_files = value.count("True") if len(value) > 1: total_defective_files += len(value) if len(value) == count_defective_files or count_defective_files == 1: fully_defective += 1 else: partially_defective += 1 partially_defective_files += count_defective_files ratio_defective_files_in_defective_commits = round( (partially_defective_files / total_defective_files) * 100, 1) ratio_partially_defective_commits = round( (partially_defective / (fully_defective + partially_defective)) * 100, 1) print( "Partially def. commits: {}%. Defective files in partially def. commits: {}%" .format(ratio_partially_defective_commits, ratio_defective_files_in_defective_commits))
fieldnames=header) writer.writeheader() # Perform Git blame to retrieve the list of BIC commits gr = GitRepository(args.repo) fixes = csv.DictReader(open(args.csv, 'r', newline='', encoding="utf-8"), delimiter=args.delimiter) count = 0 for fix in fixes: git_hash = fix['git_hash'] print('{}) Processing {} '.format(count, git_hash)) fix_commit = gr.get_commit(git_hash) for mod in fix_commit.modifications: if mod.filename.endswith('.cpp'): if args.notuse: bic_mods = gr.get_commits_last_modified_lines( fix_commit, mod, hashes_to_ignore_path=args.notuse) else: bic_mods = gr.get_commits_last_modified_lines( fix_commit, mod) print(' ==> {} has {} MOD, {} BIC'.format( git_hash, len(bic_mods), get_bic_count(bic_mods))) dout = { 'git_timestamp': fix_commit.committer_date, 'git_modifications': len(fix_commit.modifications), 'git_methods': get_method_count(fix_commit.modifications), 'bic_count': len(bic_mods) } # Append the ancillary data contained in the input file for ic in input_columns: dout[ic] = fix[ic]
def findBugCausingCommits(projectMap, local_repos_directory, output_directory): bugInducingProjectMap = {} for project, commits in projectMap.items(): print("finding bug causing commits for ", str(local_repos_directory) + "/" + project) if (os.path.exists( str(output_directory) + "/" + str(project) + "_bug_causing_commits") and os.path.isfile( str(output_directory) + "/" + str(project) + "_bug_causing_commits")): print(project, "already analyzed, skipping...") continue repo_path = str(local_repos_directory) + "/" + project repo = GitRepository(repo_path) startTime = time.time() bugInducingCommits = [] hashes = [x["commit_hash"] for x in commits] try: # analyze each bug fix for this project for bugFix in RepositoryMining( repo_path, only_commits=hashes).traverse_commits(): # get the commits that last touched the modified lines of the files commitsLastTouchedFix = repo.get_commits_last_modified_lines( bugFix) bugCausingHashes = set([]) for filename, fileCommit in commitsLastTouchedFix.items(): for fileHash in fileCommit: bugCausingHashes.add(fileHash) hashList = [x for x in bugCausingHashes] # get average statistics about each of these commits # number of files modified for the commit # number of lines added for the commit # number of lines removed for the commit # number of methods changed for the commit # author of the commit # the elapsed time for the bug fix # branches for bugCausingCommit in RepositoryMining( repo_path, only_commits=hashList).traverse_commits(): numModifiedFiles = len(bugCausingCommit.modifications) linesAdded = 0 linesRemoved = 0 numMethodsChanged = 0 sum_nloc = 0 numFilesWithComplexity = 0 sumComplexity = 0 if numModifiedFiles <= 0: continue for modification in bugCausingCommit.modifications: sourceCodeLanguage = LanguageDetector.detect( modification.filename) try: if (sourceCodeLanguage == None or modification.nloc == 0 or modification.nloc is None): continue except: pass sum_nloc = sum_nloc + modification.nloc linesAdded = linesAdded + modification.added linesRemoved = linesRemoved + modification.removed numMethodsChanged = numMethodsChanged + len( modification.changed_methods) if modification.complexity: numFilesWithComplexity = numFilesWithComplexity + 1 sumComplexity = sumComplexity + modification.complexity averageComplexityFixedFiles = 0 if (numFilesWithComplexity != 0): averageComplexityFixedFiles = sumComplexity / numFilesWithComplexity bugInducingInfo = { "commit_hash": bugCausingCommit.hash, "author": bugCausingCommit.author.name, "total_complexity": sumComplexity, "average_complexity": averageComplexityFixedFiles, "sum_nloc": sum_nloc, "num_files": numModifiedFiles, "lines_added": linesAdded, "lines_removed": linesRemoved, "commit_date": bugCausingCommit.author_date, "branches": bugCausingCommit.branches, "num_methods_changed": numMethodsChanged, "time_to_fix": bugFix.author_date - bugCausingCommit.author_date } # print(bugInducingInfo["commit_hash"]) # print(bugInducingInfo["author"]) # print(bugInducingInfo["total_complexity"]) # print(bugInducingInfo["average_complexity"]) # print(bugInducingInfo["sum_nloc"]) # print(bugInducingInfo["num_files"]) # print(bugInducingInfo["lines_added"]) # print(bugInducingInfo["lines_removed"]) # print(bugInducingInfo["commit_date"]) # print(bugInducingInfo["branches"]) # print(bugInducingInfo["num_methods_changed"]) # print(bugInducingInfo["time_to_fix"]) bugInducingCommits.append(bugInducingInfo) tempMap = {project: bugInducingCommits} IOUtils.writeBugMap(tempMap, output_directory, "_bug_causing_commits") endTime = time.time() print("time", endTime - startTime) except: print("FAILED FOR", project) pass
def main(): repo_path = sys.argv[1] repo_branch = 'master' commits = RepositoryMining(repo_path, only_in_branch=repo_branch).traverse_commits() commits = [commit for commit in commits] gitRepo = GitRepository(repo_path) start_date = commits[0].committer_date + relativedelta(years=3) last_date = commits[-1].committer_date - relativedelta(years=3) bug_tracker = defaultdict(list) bug_tracker_pickle = "data3/{}.pickle".format( os.path.basename(os.path.normpath(repo_path))) # First index the buggy files if os.path.exists(bug_tracker_pickle): with open(bug_tracker_pickle, 'rb') as handle: bug_tracker = pickle.load(handle) else: for commit_index, commit in enumerate(commits): if not is_bugfix_commit(commit.msg): continue try: for m in commit.modifications: if not valid_source_file(m.filename): continue bug_commit = gitRepo.get_commits_last_modified_lines( commit, m) ### uses SZZ # if bug_commit == {}: continue bug_start_index = 99999999999999999999 for _file in bug_commit: for i, _commit in enumerate(commits[:commit_index]): if _commit.hash in bug_commit[_file] \ and i<bug_start_index: bug_start_index = i for _commit in commits[bug_start_index:commit_index]: bug_tracker[_commit.hash].append(m.filename) except Exception as e: print("[***]", e) print(traceback.format_exc()) print("Continuing for next commits") print(len(bug_tracker.keys())) with open(bug_tracker_pickle, 'wb') as handle: pickle.dump(bug_tracker, handle, protocol=pickle.HIGHEST_PROTOCOL) # Copy the files with open('maj_versions/{}.hash'.format( os.path.basename(os.path.normpath(repo_path)))) as f: major_releases = [] for line in f.read().splitlines(): tag, hash = line.split(',') major_releases.append((tag, hash)) for version, commit in enumerate(commits): if not commit.hash in [item[1] for item in major_releases]: continue if commit.committer_date < start_date or commit.committer_date > last_date: continue for tag, hash in major_releases: if hash == commit.hash: break print("[*] Doing {}".format(tag)) gitRepo.checkout(commit.hash) base_dir_not_bug = "data3/{}/{}/not_bug".format( os.path.basename(os.path.normpath(repo_path)), tag) base_dir_bug = "data3/{}/{}/bug".format( os.path.basename(os.path.normpath(repo_path)), tag) if not os.path.exists(base_dir_bug): os.makedirs(base_dir_bug) if not os.path.exists(base_dir_not_bug): os.makedirs(base_dir_not_bug) all_files = gitRepo.files() for _file in all_files: if not valid_source_file(_file): continue filename = os.path.basename(os.path.normpath(_file)) if commit.hash in bug_tracker and filename in bug_tracker[ commit.hash]: file_path_to_write = os.path.join(base_dir_bug, filename) else: file_path_to_write = os.path.join(base_dir_not_bug, filename) shutil.copyfile(_file, file_path_to_write) print("All Done!")
index = 0 for commit in RepositoryMining(project).traverse_commits(): print("{} : {}".format(index, commit.hash)) index += 1 flag = 0 bug_msg = "" added = 0 deleted = 0 for modified in commit.modifications: added += modified.added deleted += modified.removed total_added += added total_deleted += deleted for bug in bugs: if bug in commit.msg.lower(): flag = 1 bug_msg = "{}{} , ".format(bug_msg, bug) added_buggy += added deleted_buggy += deleted if flag == 1: total_buggy_commits += 1 buggy_commits = gr.get_commits_last_modified_lines(commit) inducing_commits = 0 inducing_commits += len(buggy_commits) data_writer.writerow(['', commit.hash, commit.author.name, commit.author.email, commit.author_date, bug_msg, commit.msg, "", '{}'.format(inducing_commits), '{}'.format(buggy_commits)]) print("{} : {} - {}".format(total_buggy_commits, commit.hash,buggy_commits)) total_inducing_commits += inducing_commits data_writer2.writerow([project,'{}'.format(index),'{}'.format(total_buggy_commits),'{}'.format(total_inducing_commits), '{}'.format(total_added),'{}'.format(total_deleted),'{}'.format(added_buggy), '{}'.format(deleted_buggy)]) data_file.close() data_file2.close()
vals = np.array(list(bug_dict.values())) # count = 0 for i in vals: # print("How many element in the list? \n", len(i)) if len(i) == 1: for candidate_commit in RepositoryMining("~/openstack", single=i).traverse_commits(): print("cand sha is: ", candidate_commit.hash) print("cand msg is: ", candidate_commit.msg) for modified_files in candidate_commit.modifications: print("Modified this file : ", modification_file.filename) diff = modified_files.diff parsed_diff = gr.parse_diff(diff) print("This is the usual diff: {}".format(diff)) buggy_induced_commits = gr.get_commits_last_modified_lines( candidate_commit, modified_files) print("This is a bug inducing commit : ", buggy_induced_commits) pprint("Parsed diff {} :".format(parsed_diff)) # "This is this diff of the file : {}".format(parsed_diff) else: for x in i: for cand_commit in RepositoryMining("~/openstack/", single=x).traverse_commits(): print("cand sha is: {}".format(cand_commit.hash)) print("cand msg is: {}".format(cand_commit.msg)) for modified_files in cand_commit.modifications: print("Modified this file : {} ".format( modified_files.filename)) diff = modified_files.diff
# commit abc modified line 1 of file A # commit def modified line 2 of file A # commit ghi modified line 3 of file A # commit lmn deleted lines 1 and 2 of file A from pydriller import GitRepository gr = GitRepository('facebook/rocksdb') commit = gr.get_commits_last_modified_lines().getcommit('lmn') buggy_commits = gr.get_commits_last_modified_lines(commit) print(buggy_commits)
def miner(): repo_path = os.path.abspath(working_path + repo_name) # Clone if necessary if not os.path.exists(repo_path): print("Cloning: {}".format(repo_name)) for c in RepositoryMining(repo_git, clone_repo_to=os.path.abspath( working_path)).traverse_commits(): pass else: print("{} clone done!".format(repo_name)) # Extract FIX and BIC bic_csv = os.path.abspath(working_path + repo_name + "_all.csv") header = [ "hash", "path", "size", "developer", "type", "fix", "bic_path", "bic_hash", "bic_size" ] if not os.path.exists(bic_csv): print("Extracting FIX and BIC") out_file = open(bic_csv, 'w', newline='', encoding="utf-8") writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header) writer.writeheader() to_date = datetime(2017, 12, 1, 12, 0, 0) gr = GitRepository(repo_path) gr2 = GitRepository(repo_path) for commit in RepositoryMining( repo_path, to=to_date, only_no_merge=True, only_modifications_with_file_types=extensions, reversed_order=True).traverse_commits(): msg = commit.msg.lower() mods = commit.modifications if len(mods) < 50 and any(word in msg for word in keywords): dout = { "hash": commit.hash, "size": len(mods), "developer": commit.committer.email, "fix": True } for mod in mods: dout["type"] = mod.change_type if mod.change_type == ModificationType.DELETE: dout["path"] = mod.old_path else: dout["path"] = mod.new_path bics_per_mod = gr.get_commits_last_modified_lines( commit, mod) for bic_path, bic_commit_hashs in bics_per_mod.items(): dout["bic_path"] = bic_path for bic_commit_hash in bic_commit_hashs: bic = gr2.get_commit(bic_commit_hash) dout["bic_hash"] = bic_commit_hash dout["bic_size"] = len(bic.modifications) writer.writerow(dout) out_file.flush() else: dout = { "hash": commit.hash, "size": len(mods), "developer": commit.committer.email, "fix": False, "bic_path": "---", "bic_hash": "---", "bic_size": "---" } for mod in mods: dout["path"] = mod.new_path writer.writerow(dout) out_file.flush() out_file.close() else: print("Extracting FIX and BIC done!") # Get unique BIC in_file = open(bic_csv, 'r', newline='', encoding="utf-8") reader = csv.DictReader(in_file, delimiter=',') unique_devs = set() unique_commits = set() fixes = {} unique_bics = set() unique_fics = set() for row in reader: unique_commits.add(row["hash"]) if row["path"].endswith(tuple(extensions)): unique_devs.add(row["developer"]) unique_bics.add(row["bic_hash"]) unique_fics.add(row["bic_path"]) if row["fix"] == "True": fixes[row["hash"]] = True unique_bics.remove("---") unique_fics.remove("---") in_file.close() print("Developers: {}, Commits: {} Defective: {}".format( len(unique_devs), len(unique_commits), len(fixes))) # Save list of BIC unique_bic_txt = os.path.abspath(working_path + repo_name + "_unique_bic.txt") out_file = open(unique_bic_txt, 'w', newline='', encoding="utf-8") for bic in unique_bics: out_file.write(bic) out_file.write("\n") out_file.close() # Save list of FIX unique_fix_txt = os.path.abspath(working_path + repo_name + "_unique_fix.txt") out_file = open(unique_fix_txt, 'w', newline='', encoding="utf-8") for fix in fixes: out_file.write(fix) out_file.write("\n") out_file.close() # Count fully and partially defective commits, and defective files in defective commits bic_csv = os.path.abspath(working_path + repo_name + "_bic_metrics.csv") header = ["bic_hash", "bic_size", "bic_path", "defective"] if not os.path.exists(bic_csv): print("Counting partial BIC") out_file = open(bic_csv, 'w', newline='', encoding="utf-8") writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header) writer.writeheader() gr = GitRepository(repo_path) for bic_hash in unique_bics: commit = gr.get_commit(bic_hash) diff = count_file = len(commit.modifications) dout = { "bic_hash": bic_hash, "bic_size": len(commit.modifications) } for mod in commit.modifications: if mod.filename.endswith( tuple(extensions) ) and mod.change_type is not ModificationType.DELETE: dout["bic_path"] = mod.new_path if mod.new_path in unique_fics: diff -= 1 dout["defective"] = True else: dout["defective"] = False writer.writerow(dout) out_file.flush() else: count_file -= 1 diff -= 1 out_file.close() else: print("Counting partial BIC done!") # Calculate partially defective commits in_file = open(bic_csv, 'r', newline='', encoding="utf-8") reader = csv.DictReader(in_file, delimiter=',') bics = {} fully_defective = partially_defective = 0 partially_defective_files = total_defective_files = 0 for row in reader: if row["bic_path"].endswith(tuple(extensions)): if row["bic_hash"] in bics: bics[row["bic_hash"]].append(row["defective"]) else: bics[row["bic_hash"]] = [row["defective"]] for key, value in bics.items(): count_defective_files = value.count("True") if len(value) > 1: total_defective_files += count_defective_files if len(value) == count_defective_files: fully_defective += 1 else: partially_defective += 1 partially_defective_files += len(value) - count_defective_files if total_defective_files != 0: ratio_defective_files_in_defective_commits = round( (partially_defective_files / total_defective_files) * 100, 1) else: ratio_defective_files_in_defective_commits = 0 denominator = fully_defective + partially_defective if denominator != 0: ratio_partially_defective_commits = round( (partially_defective / denominator) * 100, 1) else: ratio_partially_defective_commits = 0 print( "Partially def. commits: {}%. Defective files in partially def. commits: {}%" .format(ratio_partially_defective_commits, ratio_defective_files_in_defective_commits))
def main(): repo_path = sys.argv[1] repo_branch = 'master' base_dir_fixed = "data/fixed/{}".format( os.path.basename(os.path.normpath(repo_path))) base_dir_bug = "data/bug/{}".format( os.path.basename(os.path.normpath(repo_path))) if not os.path.exists(base_dir_bug): os.makedirs(base_dir_bug) if not os.path.exists(base_dir_fixed): os.makedirs(base_dir_fixed) gitRepo = GitRepository(repo_path) commits = RepositoryMining(repo_path, only_in_branch=repo_branch).traverse_commits() i = 0 for commit in commits: # print(commit.hash, commit.msg.split('\n')[0]) try: if not valid_commit(commit): continue i += 1 # if i==250: break fixed_files = [] for m in commit.modifications: if not valid_modification(m): continue bug_commit = gitRepo.get_commits_last_modified_lines( commit, m) ### uses SZZ if bug_commit == {}: continue fixed_files.append(m.filename) fixed_file_name = "{}/{}_{}_{}".format(base_dir_fixed, str(i), commit.hash[:6], m.filename) for file in bug_commit: if file.split('/')[-1] not in fixed_files: continue # print("\tfalallala", file, fixed_files) latest_bug_commit_date = utc.localize( datetime.strptime("1/1/1950 00:00:00", "%d/%m/%Y %H:%M:%S")) latest_bug_commit_hash = "" for past_commit_hash in bug_commit[file]: past_commit = gitRepo.get_commit(past_commit_hash) past_commit_date = past_commit.committer_date.replace( tzinfo=utc) if past_commit_date > latest_bug_commit_date: latest_bug_commit_date = past_commit.author_date latest_bug_commit_hash = past_commit_hash latest_bug_commit = gitRepo.get_commit( latest_bug_commit_hash) for bug_m in latest_bug_commit.modifications: if bug_m.filename not in fixed_files: continue if bug_m.source_code == None: continue bug_file_name = "{}/{}_{}_{}".format( base_dir_bug, str(i), latest_bug_commit.hash[:6], bug_m.filename) with open(bug_file_name, 'w') as the_file: the_file.write(bug_m.source_code) with open(fixed_file_name, 'w') as the_file: the_file.write(m.source_code) print(i, commit.msg) #.split('\n')[0]) print(fixed_file_name) print(bug_file_name) print("********") except Exception as e: print("[***]", e) # print(traceback.format_exc()) print("Continuing for next commits") print("All done")
from pydriller import RepositoryMining, GitRepository repo = '/Users/luca/TUProjects/Salerno/jpacman-framework' fix_commits = [] for commit in RepositoryMining( repo, only_modifications_with_file_types=['.java']).traverse_commits(): if 'fix' in commit.msg: fix_commits.append(commit) gr = GitRepository(repo) buggy_commit_hashs = set() for fix_commit in fix_commits: bug_commits = gr.get_commits_last_modified_lines(fix_commit) buggy_commit_hashs.update(bug_commits) # Add a set to a set print('Number of commits with fixes: {}'.format(len(fix_commits))) print('Number of commits with bugs: {}'.format(len(buggy_commit_hashs)))