예제 #1
0
def run_for_one_project(db_name,project_name, repo_path,thread_id=0):
    db_obj = db.DB()
    db_obj.set_db_name(db_name)
    cursor, conn = db_obj.connect_mysql()
    gr = GitRepository('{}/{}'.format(repo_path, project_name))
    totalCommits = gr.total_commits()
    count = 0
    try:
        for commit in RepositoryMining('{}/{}'.format(repo_path, project_name),
                                       only_modifications_with_file_types=['.java']).traverse_commits():
            msg = commit.msg.lower()  # convert the commit message to lower case
            for key in bugs:
                # def __init__(self,db_name, commit_id,project_name,message,identification_key,commit_date, author_name, author_email):
                if key in msg:
                    # print("{}:{}:{}".format(key, msg, commit.hash))
                    bugfix = bug.BugFix(db_obj,cursor,conn, commit.hash, project_name, msg, key, commit.committer_date,
                                        commit.author.name, commit.author.email)
                    bugfix.insert_into_database()  # insert bugfix
                    for modified_file in commit.modifications:
                        if modified_file.filename.endswith('.java'):
                            # print("{} Modified files: {}".format(commit.hash,modified_file.new_path)
                            churn = modified_file.added + modified_file.removed
                            bug_fix_file = bug.BugFixFile(db_obj,cursor,conn, commit.hash, modified_file.new_path, churn)
                            bug_induce_commits = gr.get_commits_last_modified_lines(commit, modified_file)
                            bugfix.set_induce_commits(bug_induce_commits.get(modified_file.new_path))
                            # at this point you can insert bug fix and modified files
                            bugfix.insert_into_bug_fix_induce()  # insert bug fix induce
                            bug_fix_file.insert_into_database()  # insert bug fix file
                            try:
                                for ind_commit in bug_induce_commits.get(modified_file.new_path):
                                    getDetailsOfInduceCommit(db_obj,cursor,conn, project_name, gr, ind_commit)
                            except:
                                print("no induce commits found")
                    break

            count = count + 1
            if count%100==0:
                print("Thread {}: Done processing: {} {}/{}".format(thread_id,commit.hash, count, totalCommits))
        db_obj.close_connection(conn)
    except:
        print("Exception occured")
예제 #2
0
                    if "solves" in s: solves += 1
                    if "solved" in s: solved += 1
                    if "except" in s: exceptX += 1
                    #
                    # print(Commit.msg)
                    # get files code modifications
                    # == displays all code modifications
                    # print(modified_file.diff)
                    # to facilitate the parsing of code modifications, we use the GitRepository Class in line 3
                    # diff =modified_file.diff
                    # print(diff)
                    # parsed_diff= repos.parse_diff(diff)
                    # pprint(parsed_diff)

                    # GET ALL BUGGY COMMITS
                    bug_inducing_commits = repos.get_commits_last_modified_lines(Commit, modified_file)
                    # print(bug_inducing_commits)
                    BuggyCommits = bug_inducing_commits
                    # print(BuggyCommits)

                    # x = re.search("(?<=\')(.*?)(?=\')", str(BuggyCommits))
                    # print(x)
                    # match=re.findall("(?<=\')(.*?)(?=\')", str(BuggyCommits))
                    match = re.findall("(?<=\ ')[a-zA-Z0-9 ]*", str(BuggyCommits))
                    # print(match)
                    match1 = re.findall("(?<=\ {')[a-zA-Z0-9 ]*", str(BuggyCommits))
                    # print(match1)

                    NbBugs = len(match) + len(match1)

                    #print("Number of buggy commits before and after introduction of smells= " + str(NbBugs))
예제 #3
0
def miner():
    repo_path = os.path.abspath(working_path + repo_name)
    # Clone if necessary
    if not os.path.exists(repo_path):
        print("Cloning: {}".format(repo_name))
        for c in RepositoryMining(repo_git,
                                  clone_repo_to=os.path.abspath(
                                      working_path)).traverse_commits():
            pass
    else:
        print("{} clone done!".format(repo_name))

    # Extract FIX and BIC
    bic_csv = os.path.abspath(working_path + repo_name + "_bic.csv")
    header = [
        "hash", "path", "size", "developer", "fix", "bic_path", "bic_hash",
        "bic_size"
    ]
    no_fix_count = fix_count = 0
    if not os.path.exists(bic_csv):
        print("Extracting FIX and BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        to_date = datetime(2017, 6, 1, 12, 0, 0)
        gr = GitRepository(repo_path)
        gr2 = GitRepository(repo_path)
        for commit in RepositoryMining(repo_path,
                                       to=to_date,
                                       reversed_order=True).traverse_commits():
            msg = commit.msg.lower()
            mods = commit.modifications
            if any(word in msg for word in keywords):
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.author.email,
                    "fix": True
                }
                fix_count += 1
                for mod in mods:
                    dout["path"] = mod.new_path
                    bics_per_mod = gr.get_commits_last_modified_lines(
                        commit, mod)
                    for bic_path, bic_commit_hashs in bics_per_mod.items():
                        dout["bic_path"] = bic_path
                        for bic_commit_hash in bic_commit_hashs:
                            bic = gr2.get_commit(bic_commit_hash)
                            dout["bic_hash"] = bic_commit_hash
                            dout["bic_size"] = len(bic.modifications)
                            writer.writerow(dout)
                            out_file.flush()
                if (len(mods)) == 0:
                    dout["path"] = dout["bic_path"] = dout["bic_hash"] = dout[
                        "bic_size"] = "---"
                    writer.writerow(dout)
                    out_file.flush()
            else:
                no_fix_count += 1
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.committer.email,
                    "fix": False,
                    "bic_path": "---",
                    "bic_hash": "---",
                    "bic_size": "---"
                }
                for mod in mods:
                    dout["path"] = mod.new_path
                    writer.writerow(dout)
                    out_file.flush()
                if (len(mods)) == 0:
                    dout["path"] = "---"
                    writer.writerow(dout)
                    out_file.flush()
        out_file.close()
    else:
        print("Extracting FIX and BIC done!")

    # Get unique BIC
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    unique_devs = set()
    unique_commits = set()
    fixes = set()
    unique_bics = set()
    unique_fics = set()
    for row in reader:
        unique_commits.add(row["hash"])
        if row["path"].endswith(tuple(extensions)):
            unique_devs.add(row["developer"])
            unique_bics.add(row["bic_hash"])
            unique_fics.add(row["bic_path"])
        if row["fix"] == "True":
            fixes.add(row["hash"])
    unique_bics.remove("---")
    unique_fics.remove("---")
    in_file.close()
    print("Developers: {}, Commits: {} Defective: {} {} {}".format(
        len(unique_devs), len(unique_commits), len(fixes), no_fix_count,
        fix_count))

    # Count fully and partially defective commits, and defective files in defective commits
    bic_csv = os.path.abspath(working_path + repo_name + "_partial_bic.csv")
    header = ["bic_hash", "bic_size", "bic_file_size", "bic_path", "defective"]
    if not os.path.exists(bic_csv):
        print("Counting partial BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        gr = GitRepository(repo_path)
        for bic_hash in unique_bics:
            commit = gr.get_commit(bic_hash)
            diff = count_file = len(commit.modifications)
            dout = {
                "bic_hash": bic_hash,
                "bic_size": len(commit.modifications)
            }
            for mod in commit.modifications:
                if mod.filename.endswith(tuple(extensions)):
                    dout["bic_path"] = mod.new_path
                    dout["bic_file_size"] = mod.nloc
                    if mod.new_path in unique_fics:
                        diff -= 1
                        dout["defective"] = True
                    else:
                        dout["defective"] = False
                    writer.writerow(dout)
                    out_file.flush()
                else:
                    count_file -= 1
                    diff -= 1
        out_file.close()
    else:
        print("Counting partial BIC done!")

    # Calculate partially defective commits
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    bics = {}
    fully_defective = partially_defective = 0
    partially_defective_files = total_defective_files = 0
    for row in reader:
        if row["bic_path"].endswith(tuple(extensions)):
            if row["bic_hash"] in bics:
                bics[row["bic_hash"]].append(row["defective"])
            else:
                bics[row["bic_hash"]] = [row["defective"]]
    for key, value in bics.items():
        count_defective_files = value.count("True")
        if len(value) > 1:
            total_defective_files += len(value)
        if len(value) == count_defective_files or count_defective_files == 1:
            fully_defective += 1
        else:
            partially_defective += 1
            partially_defective_files += count_defective_files
    ratio_defective_files_in_defective_commits = round(
        (partially_defective_files / total_defective_files) * 100, 1)
    ratio_partially_defective_commits = round(
        (partially_defective / (fully_defective + partially_defective)) * 100,
        1)
    print(
        "Partially def. commits: {}%. Defective files in partially def. commits: {}%"
        .format(ratio_partially_defective_commits,
                ratio_defective_files_in_defective_commits))
예제 #4
0
                            fieldnames=header)
    writer.writeheader()

    # Perform Git blame to retrieve the list of BIC commits
    gr = GitRepository(args.repo)
    fixes = csv.DictReader(open(args.csv, 'r', newline='', encoding="utf-8"),
                           delimiter=args.delimiter)
    count = 0
    for fix in fixes:
        git_hash = fix['git_hash']
        print('{}) Processing {} '.format(count, git_hash))
        fix_commit = gr.get_commit(git_hash)
        for mod in fix_commit.modifications:
            if mod.filename.endswith('.cpp'):
                if args.notuse:
                    bic_mods = gr.get_commits_last_modified_lines(
                        fix_commit, mod, hashes_to_ignore_path=args.notuse)
                else:
                    bic_mods = gr.get_commits_last_modified_lines(
                        fix_commit, mod)
                print('   ==> {} has {} MOD, {} BIC'.format(
                    git_hash, len(bic_mods), get_bic_count(bic_mods)))

                dout = {
                    'git_timestamp': fix_commit.committer_date,
                    'git_modifications': len(fix_commit.modifications),
                    'git_methods': get_method_count(fix_commit.modifications),
                    'bic_count': len(bic_mods)
                }
                # Append the ancillary data contained in the input file
                for ic in input_columns:
                    dout[ic] = fix[ic]
예제 #5
0
def findBugCausingCommits(projectMap, local_repos_directory, output_directory):

    bugInducingProjectMap = {}

    for project, commits in projectMap.items():

        print("finding bug causing commits for ",
              str(local_repos_directory) + "/" + project)

        if (os.path.exists(
                str(output_directory) + "/" + str(project) +
                "_bug_causing_commits") and os.path.isfile(
                    str(output_directory) + "/" + str(project) +
                    "_bug_causing_commits")):
            print(project, "already analyzed, skipping...")
            continue

        repo_path = str(local_repos_directory) + "/" + project

        repo = GitRepository(repo_path)

        startTime = time.time()

        bugInducingCommits = []

        hashes = [x["commit_hash"] for x in commits]

        try:

            # analyze each bug fix for this project
            for bugFix in RepositoryMining(
                    repo_path, only_commits=hashes).traverse_commits():

                # get the commits that last touched the modified lines of the files
                commitsLastTouchedFix = repo.get_commits_last_modified_lines(
                    bugFix)

                bugCausingHashes = set([])

                for filename, fileCommit in commitsLastTouchedFix.items():

                    for fileHash in fileCommit:
                        bugCausingHashes.add(fileHash)

                hashList = [x for x in bugCausingHashes]

                # get average statistics about each of these commits
                # number of files modified for the commit
                # number of lines added for the commit
                # number of lines removed for the commit
                # number of methods changed for the commit
                # author of the commit
                # the elapsed time for the bug fix
                # branches
                for bugCausingCommit in RepositoryMining(
                        repo_path, only_commits=hashList).traverse_commits():

                    numModifiedFiles = len(bugCausingCommit.modifications)
                    linesAdded = 0
                    linesRemoved = 0
                    numMethodsChanged = 0
                    sum_nloc = 0
                    numFilesWithComplexity = 0
                    sumComplexity = 0

                    if numModifiedFiles <= 0: continue

                    for modification in bugCausingCommit.modifications:
                        sourceCodeLanguage = LanguageDetector.detect(
                            modification.filename)
                        try:
                            if (sourceCodeLanguage == None
                                    or modification.nloc == 0
                                    or modification.nloc is None):
                                continue
                        except:
                            pass
                        sum_nloc = sum_nloc + modification.nloc
                        linesAdded = linesAdded + modification.added
                        linesRemoved = linesRemoved + modification.removed
                        numMethodsChanged = numMethodsChanged + len(
                            modification.changed_methods)
                        if modification.complexity:
                            numFilesWithComplexity = numFilesWithComplexity + 1
                            sumComplexity = sumComplexity + modification.complexity

                    averageComplexityFixedFiles = 0

                    if (numFilesWithComplexity != 0):
                        averageComplexityFixedFiles = sumComplexity / numFilesWithComplexity

                    bugInducingInfo = {
                        "commit_hash":
                        bugCausingCommit.hash,
                        "author":
                        bugCausingCommit.author.name,
                        "total_complexity":
                        sumComplexity,
                        "average_complexity":
                        averageComplexityFixedFiles,
                        "sum_nloc":
                        sum_nloc,
                        "num_files":
                        numModifiedFiles,
                        "lines_added":
                        linesAdded,
                        "lines_removed":
                        linesRemoved,
                        "commit_date":
                        bugCausingCommit.author_date,
                        "branches":
                        bugCausingCommit.branches,
                        "num_methods_changed":
                        numMethodsChanged,
                        "time_to_fix":
                        bugFix.author_date - bugCausingCommit.author_date
                    }

                    # print(bugInducingInfo["commit_hash"])
                    # print(bugInducingInfo["author"])
                    # print(bugInducingInfo["total_complexity"])
                    # print(bugInducingInfo["average_complexity"])
                    # print(bugInducingInfo["sum_nloc"])
                    # print(bugInducingInfo["num_files"])
                    # print(bugInducingInfo["lines_added"])
                    # print(bugInducingInfo["lines_removed"])
                    # print(bugInducingInfo["commit_date"])
                    # print(bugInducingInfo["branches"])
                    # print(bugInducingInfo["num_methods_changed"])
                    # print(bugInducingInfo["time_to_fix"])

                    bugInducingCommits.append(bugInducingInfo)

            tempMap = {project: bugInducingCommits}

            IOUtils.writeBugMap(tempMap, output_directory,
                                "_bug_causing_commits")

            endTime = time.time()

            print("time", endTime - startTime)

        except:
            print("FAILED FOR", project)
            pass
예제 #6
0
def main():
    repo_path = sys.argv[1]
    repo_branch = 'master'

    commits = RepositoryMining(repo_path,
                               only_in_branch=repo_branch).traverse_commits()
    commits = [commit for commit in commits]

    gitRepo = GitRepository(repo_path)

    start_date = commits[0].committer_date + relativedelta(years=3)
    last_date = commits[-1].committer_date - relativedelta(years=3)

    bug_tracker = defaultdict(list)
    bug_tracker_pickle = "data3/{}.pickle".format(
        os.path.basename(os.path.normpath(repo_path)))

    # First index the buggy files
    if os.path.exists(bug_tracker_pickle):
        with open(bug_tracker_pickle, 'rb') as handle:
            bug_tracker = pickle.load(handle)
    else:
        for commit_index, commit in enumerate(commits):
            if not is_bugfix_commit(commit.msg):
                continue

            try:
                for m in commit.modifications:
                    if not valid_source_file(m.filename):
                        continue

                    bug_commit = gitRepo.get_commits_last_modified_lines(
                        commit, m)  ### uses SZZ
                    # if bug_commit == {}: continue

                    bug_start_index = 99999999999999999999
                    for _file in bug_commit:
                        for i, _commit in enumerate(commits[:commit_index]):
                            if _commit.hash in bug_commit[_file] \
                                and i<bug_start_index:
                                bug_start_index = i

                    for _commit in commits[bug_start_index:commit_index]:
                        bug_tracker[_commit.hash].append(m.filename)
            except Exception as e:
                print("[***]", e)
                print(traceback.format_exc())
                print("Continuing for next commits")

            print(len(bug_tracker.keys()))
        with open(bug_tracker_pickle, 'wb') as handle:
            pickle.dump(bug_tracker, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # Copy the files
    with open('maj_versions/{}.hash'.format(
            os.path.basename(os.path.normpath(repo_path)))) as f:
        major_releases = []
        for line in f.read().splitlines():
            tag, hash = line.split(',')
            major_releases.append((tag, hash))

    for version, commit in enumerate(commits):
        if not commit.hash in [item[1] for item in major_releases]:
            continue

        if commit.committer_date < start_date or commit.committer_date > last_date:
            continue

        for tag, hash in major_releases:
            if hash == commit.hash:
                break

        print("[*] Doing {}".format(tag))
        gitRepo.checkout(commit.hash)

        base_dir_not_bug = "data3/{}/{}/not_bug".format(
            os.path.basename(os.path.normpath(repo_path)), tag)
        base_dir_bug = "data3/{}/{}/bug".format(
            os.path.basename(os.path.normpath(repo_path)), tag)
        if not os.path.exists(base_dir_bug):
            os.makedirs(base_dir_bug)
        if not os.path.exists(base_dir_not_bug):
            os.makedirs(base_dir_not_bug)

        all_files = gitRepo.files()

        for _file in all_files:
            if not valid_source_file(_file):
                continue

            filename = os.path.basename(os.path.normpath(_file))
            if commit.hash in bug_tracker and filename in bug_tracker[
                    commit.hash]:
                file_path_to_write = os.path.join(base_dir_bug, filename)
            else:
                file_path_to_write = os.path.join(base_dir_not_bug, filename)

            shutil.copyfile(_file, file_path_to_write)

    print("All Done!")
    index = 0
    for commit in RepositoryMining(project).traverse_commits():
        print("{} : {}".format(index, commit.hash))
        index += 1
        flag = 0
        bug_msg = ""
        added = 0
        deleted = 0
        for modified in commit.modifications:
            added += modified.added
            deleted += modified.removed
        total_added += added
        total_deleted += deleted
        for bug in bugs:
            if bug in commit.msg.lower():
                flag = 1
                bug_msg = "{}{} , ".format(bug_msg, bug)
                added_buggy += added
                deleted_buggy += deleted
        if flag == 1:
            total_buggy_commits += 1
            buggy_commits = gr.get_commits_last_modified_lines(commit)
            inducing_commits = 0
            inducing_commits += len(buggy_commits)
            data_writer.writerow(['', commit.hash, commit.author.name, commit.author.email, commit.author_date, bug_msg, commit.msg, "", '{}'.format(inducing_commits), '{}'.format(buggy_commits)])
            print("{} : {} - {}".format(total_buggy_commits, commit.hash,buggy_commits))
            total_inducing_commits += inducing_commits
    data_writer2.writerow([project,'{}'.format(index),'{}'.format(total_buggy_commits),'{}'.format(total_inducing_commits), '{}'.format(total_added),'{}'.format(total_deleted),'{}'.format(added_buggy), '{}'.format(deleted_buggy)])
    data_file.close()
    data_file2.close()
예제 #8
0
vals = np.array(list(bug_dict.values()))
# count = 0
for i in vals:
    # print("How many element in the list? \n", len(i))
    if len(i) == 1:
        for candidate_commit in RepositoryMining("~/openstack",
                                                 single=i).traverse_commits():
            print("cand sha is: ", candidate_commit.hash)
            print("cand msg is: ", candidate_commit.msg)

            for modified_files in candidate_commit.modifications:
                print("Modified this file : ", modification_file.filename)
                diff = modified_files.diff
                parsed_diff = gr.parse_diff(diff)
                print("This is the usual diff:  {}".format(diff))
                buggy_induced_commits = gr.get_commits_last_modified_lines(
                    candidate_commit, modified_files)
                print("This is a bug inducing commit :  ",
                      buggy_induced_commits)
                pprint("Parsed diff {} :".format(parsed_diff))
                # "This is this diff of the file :  {}".format(parsed_diff)
    else:
        for x in i:
            for cand_commit in RepositoryMining("~/openstack/",
                                                single=x).traverse_commits():
                print("cand sha is: {}".format(cand_commit.hash))
                print("cand msg is: {}".format(cand_commit.msg))

                for modified_files in cand_commit.modifications:
                    print("Modified this file : {} ".format(
                        modified_files.filename))
                    diff = modified_files.diff
예제 #9
0
# commit abc modified line 1 of file A
# commit def modified line 2 of file A
# commit ghi modified line 3 of file A
# commit lmn deleted lines 1 and 2 of file A
from pydriller import GitRepository

gr = GitRepository('facebook/rocksdb')
commit = gr.get_commits_last_modified_lines().getcommit('lmn')
buggy_commits = gr.get_commits_last_modified_lines(commit)
print(buggy_commits)
def miner():
    repo_path = os.path.abspath(working_path + repo_name)
    # Clone if necessary
    if not os.path.exists(repo_path):
        print("Cloning: {}".format(repo_name))
        for c in RepositoryMining(repo_git,
                                  clone_repo_to=os.path.abspath(
                                      working_path)).traverse_commits():
            pass
    else:
        print("{} clone done!".format(repo_name))

    # Extract FIX and BIC
    bic_csv = os.path.abspath(working_path + repo_name + "_all.csv")
    header = [
        "hash", "path", "size", "developer", "type", "fix", "bic_path",
        "bic_hash", "bic_size"
    ]
    if not os.path.exists(bic_csv):
        print("Extracting FIX and BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        to_date = datetime(2017, 12, 1, 12, 0, 0)
        gr = GitRepository(repo_path)
        gr2 = GitRepository(repo_path)
        for commit in RepositoryMining(
                repo_path,
                to=to_date,
                only_no_merge=True,
                only_modifications_with_file_types=extensions,
                reversed_order=True).traverse_commits():
            msg = commit.msg.lower()
            mods = commit.modifications
            if len(mods) < 50 and any(word in msg for word in keywords):
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.committer.email,
                    "fix": True
                }
                for mod in mods:
                    dout["type"] = mod.change_type
                    if mod.change_type == ModificationType.DELETE:
                        dout["path"] = mod.old_path
                    else:
                        dout["path"] = mod.new_path
                    bics_per_mod = gr.get_commits_last_modified_lines(
                        commit, mod)
                    for bic_path, bic_commit_hashs in bics_per_mod.items():
                        dout["bic_path"] = bic_path
                        for bic_commit_hash in bic_commit_hashs:
                            bic = gr2.get_commit(bic_commit_hash)
                            dout["bic_hash"] = bic_commit_hash
                            dout["bic_size"] = len(bic.modifications)
                            writer.writerow(dout)
                            out_file.flush()
            else:
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.committer.email,
                    "fix": False,
                    "bic_path": "---",
                    "bic_hash": "---",
                    "bic_size": "---"
                }
                for mod in mods:
                    dout["path"] = mod.new_path
                    writer.writerow(dout)
                    out_file.flush()
        out_file.close()
    else:
        print("Extracting FIX and BIC done!")

    # Get unique BIC
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    unique_devs = set()
    unique_commits = set()
    fixes = {}
    unique_bics = set()
    unique_fics = set()
    for row in reader:
        unique_commits.add(row["hash"])
        if row["path"].endswith(tuple(extensions)):
            unique_devs.add(row["developer"])
            unique_bics.add(row["bic_hash"])
            unique_fics.add(row["bic_path"])
            if row["fix"] == "True":
                fixes[row["hash"]] = True
    unique_bics.remove("---")
    unique_fics.remove("---")
    in_file.close()
    print("Developers: {}, Commits: {} Defective: {}".format(
        len(unique_devs), len(unique_commits), len(fixes)))

    # Save list of BIC
    unique_bic_txt = os.path.abspath(working_path + repo_name +
                                     "_unique_bic.txt")
    out_file = open(unique_bic_txt, 'w', newline='', encoding="utf-8")
    for bic in unique_bics:
        out_file.write(bic)
        out_file.write("\n")
    out_file.close()

    # Save list of FIX
    unique_fix_txt = os.path.abspath(working_path + repo_name +
                                     "_unique_fix.txt")
    out_file = open(unique_fix_txt, 'w', newline='', encoding="utf-8")
    for fix in fixes:
        out_file.write(fix)
        out_file.write("\n")
    out_file.close()

    # Count fully and partially defective commits, and defective files in defective commits
    bic_csv = os.path.abspath(working_path + repo_name + "_bic_metrics.csv")
    header = ["bic_hash", "bic_size", "bic_path", "defective"]
    if not os.path.exists(bic_csv):
        print("Counting partial BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        gr = GitRepository(repo_path)
        for bic_hash in unique_bics:
            commit = gr.get_commit(bic_hash)
            diff = count_file = len(commit.modifications)
            dout = {
                "bic_hash": bic_hash,
                "bic_size": len(commit.modifications)
            }
            for mod in commit.modifications:
                if mod.filename.endswith(
                        tuple(extensions)
                ) and mod.change_type is not ModificationType.DELETE:
                    dout["bic_path"] = mod.new_path
                    if mod.new_path in unique_fics:
                        diff -= 1
                        dout["defective"] = True
                    else:
                        dout["defective"] = False
                    writer.writerow(dout)
                    out_file.flush()
                else:
                    count_file -= 1
                    diff -= 1
        out_file.close()
    else:
        print("Counting partial BIC done!")

    # Calculate partially defective commits
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    bics = {}
    fully_defective = partially_defective = 0
    partially_defective_files = total_defective_files = 0
    for row in reader:
        if row["bic_path"].endswith(tuple(extensions)):
            if row["bic_hash"] in bics:
                bics[row["bic_hash"]].append(row["defective"])
            else:
                bics[row["bic_hash"]] = [row["defective"]]
    for key, value in bics.items():
        count_defective_files = value.count("True")
        if len(value) > 1:
            total_defective_files += count_defective_files
        if len(value) == count_defective_files:
            fully_defective += 1
        else:
            partially_defective += 1
            partially_defective_files += len(value) - count_defective_files
    if total_defective_files != 0:
        ratio_defective_files_in_defective_commits = round(
            (partially_defective_files / total_defective_files) * 100, 1)
    else:
        ratio_defective_files_in_defective_commits = 0
    denominator = fully_defective + partially_defective
    if denominator != 0:
        ratio_partially_defective_commits = round(
            (partially_defective / denominator) * 100, 1)
    else:
        ratio_partially_defective_commits = 0
    print(
        "Partially def. commits: {}%. Defective files in partially def. commits: {}%"
        .format(ratio_partially_defective_commits,
                ratio_defective_files_in_defective_commits))
예제 #11
0
파일: bug_and_fix.py 프로젝트: rafed/bugpy
def main():
    repo_path = sys.argv[1]
    repo_branch = 'master'
    base_dir_fixed = "data/fixed/{}".format(
        os.path.basename(os.path.normpath(repo_path)))
    base_dir_bug = "data/bug/{}".format(
        os.path.basename(os.path.normpath(repo_path)))

    if not os.path.exists(base_dir_bug):
        os.makedirs(base_dir_bug)
    if not os.path.exists(base_dir_fixed):
        os.makedirs(base_dir_fixed)

    gitRepo = GitRepository(repo_path)
    commits = RepositoryMining(repo_path,
                               only_in_branch=repo_branch).traverse_commits()

    i = 0

    for commit in commits:
        # print(commit.hash, commit.msg.split('\n')[0])
        try:
            if not valid_commit(commit):
                continue
            i += 1
            # if i==250: break

            fixed_files = []

            for m in commit.modifications:
                if not valid_modification(m):
                    continue

                bug_commit = gitRepo.get_commits_last_modified_lines(
                    commit, m)  ### uses SZZ

                if bug_commit == {}:
                    continue

                fixed_files.append(m.filename)
                fixed_file_name = "{}/{}_{}_{}".format(base_dir_fixed, str(i),
                                                       commit.hash[:6],
                                                       m.filename)

                for file in bug_commit:
                    if file.split('/')[-1] not in fixed_files:
                        continue
                    # print("\tfalallala", file, fixed_files)

                    latest_bug_commit_date = utc.localize(
                        datetime.strptime("1/1/1950 00:00:00",
                                          "%d/%m/%Y %H:%M:%S"))
                    latest_bug_commit_hash = ""

                    for past_commit_hash in bug_commit[file]:
                        past_commit = gitRepo.get_commit(past_commit_hash)
                        past_commit_date = past_commit.committer_date.replace(
                            tzinfo=utc)
                        if past_commit_date > latest_bug_commit_date:
                            latest_bug_commit_date = past_commit.author_date
                            latest_bug_commit_hash = past_commit_hash

                    latest_bug_commit = gitRepo.get_commit(
                        latest_bug_commit_hash)

                    for bug_m in latest_bug_commit.modifications:
                        if bug_m.filename not in fixed_files:
                            continue

                        if bug_m.source_code == None:
                            continue

                        bug_file_name = "{}/{}_{}_{}".format(
                            base_dir_bug, str(i), latest_bug_commit.hash[:6],
                            bug_m.filename)
                        with open(bug_file_name, 'w') as the_file:
                            the_file.write(bug_m.source_code)

                        with open(fixed_file_name, 'w') as the_file:
                            the_file.write(m.source_code)

                        print(i, commit.msg)  #.split('\n')[0])
                        print(fixed_file_name)
                        print(bug_file_name)
                        print("********")

        except Exception as e:
            print("[***]", e)
            # print(traceback.format_exc())
            print("Continuing for next commits")

    print("All done")
예제 #12
0
from pydriller import RepositoryMining, GitRepository

repo = '/Users/luca/TUProjects/Salerno/jpacman-framework'

fix_commits = []
for commit in RepositoryMining(
        repo, only_modifications_with_file_types=['.java']).traverse_commits():
    if 'fix' in commit.msg:
        fix_commits.append(commit)

gr = GitRepository(repo)

buggy_commit_hashs = set()
for fix_commit in fix_commits:
    bug_commits = gr.get_commits_last_modified_lines(fix_commit)
    buggy_commit_hashs.update(bug_commits)  # Add a set to a set

print('Number of commits with fixes: {}'.format(len(fix_commits)))
print('Number of commits with bugs: {}'.format(len(buggy_commit_hashs)))