Beispiel #1
0
    def get_NCOMM_NADEV_NNDEV_NSCTR(self, index: int, commits: List[MetricBean], beans: MyMetricBeans, my_commits: MyCommits) -> [int, int, int, int]:
        devs = []
        packages = set()
        # commit_count = set()
        # for i in range(0, index):
        #     git_hash = commits[i].git_hash
        #     if git_hash in my_commits.hashes:
        #         for path in my_commits.hashes[git_hash]:
        #             for mb in beans.get(path):
        #                 commit_count.add(mb.git_hash)
        #                 devs.append(mb.committer_email)
        #                 packages.add(mb.package)

        gr = GitRepository(self.repo_path)
        commit = gr.get_commit(commits[index].git_hash)
        commit_count = 1
        for mod in commit.modifications:
            if mod.new_path is not None and mod.new_path == commits[index].new_path:  # This must be changed
                commits_modified_file = gr.get_commits_modified_file(mod.new_path)
                for cmf in commits_modified_file:
                    commit_count += 1
                    c = gr.get_commit(cmf)
                    devs.append(c.author.email)
                    for m in c.modifications:
                        path = m.new_path if m.new_path is not None else m.old_path
                        if path.endswith(m.filename):
                            package = path[:-(len(m.filename) + 1)]
                            packages.add(package)
        return [commit_count, len(devs), len(set(devs)), len(packages)]
Beispiel #2
0
def ublame_cli(filename, patterns):
    filename = os.path.abspath(filename)
    repo_path = repo_path_for(filename)
    relative_filename = filename.split(repo_path)[-1].strip("/")
    repo = GitRepository(repo_path)

    for commit_hash in repo.get_commits_modified_file(relative_filename):
        commit = repo.get_commit(commit_hash)
        diff_commit(commit, patterns)
Beispiel #3
0
 def get_SCTR(self, index: int, commits: List[MetricBean], beans: MyMetricBeans, my_commits: MyCommits) -> int:
     packages = set()
     # for i in range(0, index):
     #     git_hash = commits[i].git_hash
     #     if git_hash in my_commits.hashes:
     #         for path in my_commits.hashes[git_hash]:
     #             for mb in beans.get(path):
     #                 if commits[index].committer_email == mb.committer_email:
     #                     packages.add(mb.package)
     gr = GitRepository(self.repo_path)
     commit = gr.get_commit(commits[index].git_hash)
     if commits[index].committer_email == commit.committer.email:
         for mod in commit.modifications:
             path = mod.new_path if mod.new_path is not None else mod.old_path
             if path.endswith(mod.filename):
                 package = path[:-(len(mod.filename) + 1)]
                 packages.add(package)
     return len(packages)
Beispiel #4
0
def test_diff_no_newline():
    """
    If a file ends without a newline git represents this with the additional line
        \\ No newline at end of file
    in diffs. This test asserts these additional lines are parsed correctly.
    """
    gr = GitRepository('test-repos/no_newline')

    diff = gr.get_commit('52a78c1ee5d100528eccba0a3d67371dbd22d898').modifications[0].diff
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (1, 'test1') in deleted  # is considered as deleted as a 'newline' command is added
    assert (1, 'test1') in added  # now with added 'newline'
    assert (2, 'test2') in added

    gr.clear()
Beispiel #5
0
def miner():
    repo_path = os.path.abspath(working_path + repo_name)
    # Clone if necessary
    if not os.path.exists(repo_path):
        print("Cloning: {}".format(repo_name))
        for c in RepositoryMining(repo_git,
                                  clone_repo_to=os.path.abspath(
                                      working_path)).traverse_commits():
            pass
    else:
        print("{} clone done!".format(repo_name))

    # Extract FIX and BIC
    bic_csv = os.path.abspath(working_path + repo_name + "_bic.csv")
    header = [
        "hash", "path", "size", "developer", "fix", "bic_path", "bic_hash",
        "bic_size"
    ]
    no_fix_count = fix_count = 0
    if not os.path.exists(bic_csv):
        print("Extracting FIX and BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        to_date = datetime(2017, 6, 1, 12, 0, 0)
        gr = GitRepository(repo_path)
        gr2 = GitRepository(repo_path)
        for commit in RepositoryMining(repo_path,
                                       to=to_date,
                                       reversed_order=True).traverse_commits():
            msg = commit.msg.lower()
            mods = commit.modifications
            if any(word in msg for word in keywords):
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.author.email,
                    "fix": True
                }
                fix_count += 1
                for mod in mods:
                    dout["path"] = mod.new_path
                    bics_per_mod = gr.get_commits_last_modified_lines(
                        commit, mod)
                    for bic_path, bic_commit_hashs in bics_per_mod.items():
                        dout["bic_path"] = bic_path
                        for bic_commit_hash in bic_commit_hashs:
                            bic = gr2.get_commit(bic_commit_hash)
                            dout["bic_hash"] = bic_commit_hash
                            dout["bic_size"] = len(bic.modifications)
                            writer.writerow(dout)
                            out_file.flush()
                if (len(mods)) == 0:
                    dout["path"] = dout["bic_path"] = dout["bic_hash"] = dout[
                        "bic_size"] = "---"
                    writer.writerow(dout)
                    out_file.flush()
            else:
                no_fix_count += 1
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.committer.email,
                    "fix": False,
                    "bic_path": "---",
                    "bic_hash": "---",
                    "bic_size": "---"
                }
                for mod in mods:
                    dout["path"] = mod.new_path
                    writer.writerow(dout)
                    out_file.flush()
                if (len(mods)) == 0:
                    dout["path"] = "---"
                    writer.writerow(dout)
                    out_file.flush()
        out_file.close()
    else:
        print("Extracting FIX and BIC done!")

    # Get unique BIC
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    unique_devs = set()
    unique_commits = set()
    fixes = set()
    unique_bics = set()
    unique_fics = set()
    for row in reader:
        unique_commits.add(row["hash"])
        if row["path"].endswith(tuple(extensions)):
            unique_devs.add(row["developer"])
            unique_bics.add(row["bic_hash"])
            unique_fics.add(row["bic_path"])
        if row["fix"] == "True":
            fixes.add(row["hash"])
    unique_bics.remove("---")
    unique_fics.remove("---")
    in_file.close()
    print("Developers: {}, Commits: {} Defective: {} {} {}".format(
        len(unique_devs), len(unique_commits), len(fixes), no_fix_count,
        fix_count))

    # Count fully and partially defective commits, and defective files in defective commits
    bic_csv = os.path.abspath(working_path + repo_name + "_partial_bic.csv")
    header = ["bic_hash", "bic_size", "bic_file_size", "bic_path", "defective"]
    if not os.path.exists(bic_csv):
        print("Counting partial BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        gr = GitRepository(repo_path)
        for bic_hash in unique_bics:
            commit = gr.get_commit(bic_hash)
            diff = count_file = len(commit.modifications)
            dout = {
                "bic_hash": bic_hash,
                "bic_size": len(commit.modifications)
            }
            for mod in commit.modifications:
                if mod.filename.endswith(tuple(extensions)):
                    dout["bic_path"] = mod.new_path
                    dout["bic_file_size"] = mod.nloc
                    if mod.new_path in unique_fics:
                        diff -= 1
                        dout["defective"] = True
                    else:
                        dout["defective"] = False
                    writer.writerow(dout)
                    out_file.flush()
                else:
                    count_file -= 1
                    diff -= 1
        out_file.close()
    else:
        print("Counting partial BIC done!")

    # Calculate partially defective commits
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    bics = {}
    fully_defective = partially_defective = 0
    partially_defective_files = total_defective_files = 0
    for row in reader:
        if row["bic_path"].endswith(tuple(extensions)):
            if row["bic_hash"] in bics:
                bics[row["bic_hash"]].append(row["defective"])
            else:
                bics[row["bic_hash"]] = [row["defective"]]
    for key, value in bics.items():
        count_defective_files = value.count("True")
        if len(value) > 1:
            total_defective_files += len(value)
        if len(value) == count_defective_files or count_defective_files == 1:
            fully_defective += 1
        else:
            partially_defective += 1
            partially_defective_files += count_defective_files
    ratio_defective_files_in_defective_commits = round(
        (partially_defective_files / total_defective_files) * 100, 1)
    ratio_partially_defective_commits = round(
        (partially_defective / (fully_defective + partially_defective)) * 100,
        1)
    print(
        "Partially def. commits: {}%. Defective files in partially def. commits: {}%"
        .format(ratio_partially_defective_commits,
                ratio_defective_files_in_defective_commits))
Beispiel #6
0
    ]
    out_file = open(args.output, 'w', newline='', encoding="utf-8")
    writer = csv.DictWriter(out_file,
                            delimiter=args.delimiter,
                            fieldnames=header)
    writer.writeheader()

    # Perform Git blame to retrieve the list of BIC commits
    gr = GitRepository(args.repo)
    fixes = csv.DictReader(open(args.csv, 'r', newline='', encoding="utf-8"),
                           delimiter=args.delimiter)
    count = 0
    for fix in fixes:
        git_hash = fix['git_hash']
        print('{}) Processing {} '.format(count, git_hash))
        fix_commit = gr.get_commit(git_hash)
        for mod in fix_commit.modifications:
            if mod.filename.endswith('.cpp'):
                if args.notuse:
                    bic_mods = gr.get_commits_last_modified_lines(
                        fix_commit, mod, hashes_to_ignore_path=args.notuse)
                else:
                    bic_mods = gr.get_commits_last_modified_lines(
                        fix_commit, mod)
                print('   ==> {} has {} MOD, {} BIC'.format(
                    git_hash, len(bic_mods), get_bic_count(bic_mods)))

                dout = {
                    'git_timestamp': fix_commit.committer_date,
                    'git_modifications': len(fix_commit.modifications),
                    'git_methods': get_method_count(fix_commit.modifications),
def modification(commit):
    gr = GitRepository("test-repos/diff")
    yield gr.get_commit(commit).modifications[0]
    gr.clear()
    def mine_methods(self,
                     start_commit: str,
                     stop_commit: str,
                     filter_methods: Set[str] = None,
                     filter_files: Set[str] = None) -> int:
        methods = {}  # Dict[str, List[MinerBean]]
        commits_to_analyze = -1
        print('Mining: ' + self.repo_path)
        gr = GitRepository(self.repo_path)

        # Redefine start and stop commits
        print('Adjust start and stop commits.')
        first_commit = start_commit
        if start_commit is None:
            first_commit = gr.get_head().hash
        last_commit = stop_commit

        # Print start and stop commits info
        c1 = gr.get_commit(first_commit)
        print('Start: {} Author date: {} Committer date: {}'.format(
            c1.hash, c1.author_date, c1.committer_date))
        c2 = gr.get_commit(last_commit)
        print('Stop:  {} Author date: {} Committer date: {}'.format(
            c2.hash, c2.author_date, c2.committer_date))

        # Unnecessary in production
        # Count commits to analyze
        print('Retrieve commits to analyze.')
        commits = []
        for commit in RepositoryMining(self.repo_path,
                                       from_commit=first_commit,
                                       to_commit=last_commit,
                                       reversed_order=True).traverse_commits():
            commits.append(commit)
            print('{}) {} {}'.format(len(commits), commit.hash,
                                     commit.author_date))
        commits_to_analyze = len(commits)

        # Open CSV file and write header
        saver = Saver(self.csv_file)
        saver.create_csv_file()
        saver.print_csv_header()

        # Traverse commits and calculate metrics
        commit_count = 0
        # for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits():
        for commit in RepositoryMining(self.repo_path,
                                       from_commit=first_commit,
                                       to_commit=last_commit,
                                       reversed_order=True).traverse_commits():
            buggy = True if commit.hash in self.bic_commits else False
            fix = True if commit.hash in self.fix_commits else False
            mod_analyzed_count = 0
            count_files_per_commit = len(commit.modifications)
            for mod in commit.modifications:
                # Filter out unnecessary files
                if filter_files is None or mod.new_path in filter_files:
                    if mod.filename.endswith(tuple(self.allowed_extensions)):
                        mod_analyzed_count += 1
                        # Update key entry on rename
                        if mod.change_type is ModificationType.RENAME:
                            methods = self.update_keys(methods, mod.new_path,
                                                       mod.old_path)
                            if filter_files is not None:
                                filter_files.add(mod.old_path)
                        count_methods_per_file = len(mod.methods)
                        for method in mod.methods:
                            key = self.get_unique_key(mod.new_path,
                                                      mod.old_path,
                                                      method.name)
                            # For unwanted keys prevent metric calculation
                            if filter_methods is None or key in filter_methods:
                                lines = mod.diff_parsed
                                method_metrics = MethodMetrics(
                                    mod.source_code, method.start_line,
                                    method.end_line, lines, buggy, fix)
                                m_touched = method_metrics.is_touched()
                                m_fix = method_metrics.is_fix()
                                m_buggy = method_metrics.is_buggy()
                                mb = MetricsBean(
                                    commit.hash, commit.author_date,
                                    mod.new_path, method.name,
                                    method.start_line, mod.change_type.name,
                                    count_files_per_commit, mod.added,
                                    mod.removed, mod.nloc, mod.complexity,
                                    mod.token_count, count_methods_per_file,
                                    method_metrics.get_added_lines(),
                                    method_metrics.get_removed_lines(),
                                    method.nloc, method.complexity,
                                    method.token_count, buggy, fix,
                                    method_metrics.get_number_of_lines(),
                                    method.fan_in, method.fan_out,
                                    method.general_fan_out,
                                    len(method.parameters),
                                    commit.author.email, m_touched, m_fix,
                                    m_buggy)
                                # Append new bean
                                if key not in methods:
                                    methods[key] = []
                                methods.get(key, []).append(mb)
                                # Going back in the past ADD is the moment in which the a file, consequently a method, is added therefore it can be removed from the disc and flushed into the CSV to save RAM
                                if mod.change_type is ModificationType.ADD:
                                    self.flush_methods(methods, key, saver)
            commit_count += 1
            print(
                'Methods: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}'
                .format(len(methods), commit_count, commits_to_analyze,
                        commit.hash, commit.author_date.strftime('%d/%m/%Y'),
                        len(commit.modifications), mod_analyzed_count, buggy,
                        fix))
        for key, value in methods.items():
            saver.add_method_to_csv(key, value)
        saver.close_csv_file()
        print('Mining ended')
        return commit_count
Beispiel #9
0
 def get_commit_info(self, hash):
     repo_driller = GitRepository(self.repo.path)
     commit_driller = repo_driller.get_commit(hash)
     return self._get_commit_info(commit_driller)
Beispiel #10
0
def info_criacao_client(request):
    # busca as opções de configuração de execução da ferramenta registradas no banco de dados
    configuracaoferramenta_choices = ConfiguracaoFerramenta.objects.all(
    ).order_by('-id')
    configuracaoferramenta_choices_to_choicefield = list()
    for configuracao in configuracaoferramenta_choices:
        configuracaoferramenta_choices_to_choicefield.append(
            [configuracao.pk, configuracao])

    # se GET cria o formulário em branco
    if request.method == 'GET':
        form = ExecutarFerramentaForm(
            configuracaoferramenta_choices_to_choicefield)
        title = 'Forkuptool - Módulo de análise de repositórios'
        subtitle = 'Selecione uma configuração para continuar'
        return render(request, 'info_criacao_client.html', locals())

    # se POST será necessário processar os dados do formulário
    elif request.method == 'POST':
        configuracaoferramenta_escolhida = None

        if 'configuracaoferramenta_escolhida' in request.POST:
            configuracaoferramenta_escolhida = request.POST[
                'configuracaoferramenta_escolhida']

        if configuracaoferramenta_escolhida:
            # busca a configuração para o id informado
            config = ConfiguracaoFerramenta.objects.get(
                pk=configuracaoferramenta_escolhida)

            repo_vendor = GitRepository(config.path_vendor)
            repo_client = GitRepository(config.path_auxiliary_files)
            commits_vendor = repo_vendor.get_list_commits()
            list_hash_vendor = []
            for c in commits_vendor:
                list_hash_vendor.append(c.hash)
            commits_client = repo_client.get_list_commits()
            list_hash_client = []
            for c in commits_client:
                list_hash_client.append(c.hash)
            hash_n_primeiros_commits_somente_client = []
            n_primeiros_commits_somente_client = []
            for c in list_hash_client:
                if c not in list_hash_vendor:
                    hash_n_primeiros_commits_somente_client.append(c)
                    if len(hash_n_primeiros_commits_somente_client
                           ) == LENGTH_INFO_CLIENT:
                        break

            for h in hash_n_primeiros_commits_somente_client:
                commit_da_vez = repo_client.get_commit(h)
                candidato_merge_vendor = False
                if commit_da_vez.merge:
                    tem_pai_vendor = False
                    tmp_parents = commit_da_vez.parents
                    for p in tmp_parents:
                        if p in list_hash_vendor:
                            tem_pai_vendor = True
                    if tem_pai_vendor:
                        candidato_merge_vendor = True

                info = {'author_date': commit_da_vez.author_date,\
                  'hash': commit_da_vez.hash,\
                  'parents': commit_da_vez.parents,\
                  'author_name': commit_da_vez.author.name,\
                  'merge': commit_da_vez.merge,\
                  'candidato_merge_vendor': candidato_merge_vendor,\
                  'msg': commit_da_vez.msg}
                n_primeiros_commits_somente_client.append(info)
                commit_da_vez = None
                info = None

            for c in n_primeiros_commits_somente_client:
                print(('{} ({}) - {} - {} - {}').format(
                    c['author_date'], c['hash'][0:7], c['author_name'],
                    c['merge'], c['msg'][0:80]))

            title = 'Forkuptool - Módulo de análise de repositórios'
            subtitle = 'Commits exclusivos do fork (repositório "client")'
            return render(request, 'info_criacao_client_show.html', locals())

        else:
            messages.error(request, 'Necessário informar uma configuração')
            return render(request, 'index.html', {
                'title': 'Forkuptool',
                'subtitle': 'Bem-vindo',
            })
from pydriller import GitRepository

repo = '/Users/luca/TUProjects/Salerno/jpacman-framework'

gr = GitRepository(repo)

last_commit = gr.get_head()
print('Commit: {} changed {} files.'.format(last_commit.hash,
                                            len(last_commit.modifications)))
for mod in last_commit.modifications:
    print('File: {} has {} additions and {} deletions'.format(
        mod.filename, mod.added, mod.removed))
    print('Complexity: {}. LOC {}'.format(mod.complexity, mod.nloc))

last_commit = gr.get_commit('51f041d')
print('Commit: {} changed {} files.'.format(last_commit.hash,
                                            len(last_commit.modifications)))
for mod in last_commit.modifications:
    print('File: {} has {} additions and {} deletions'.format(
        mod.filename, mod.added, mod.removed))
    print('Complexity: {}. LOC {}'.format(mod.complexity, mod.nloc))
def modification(request):
    path, commit = request.param
    gr = GitRepository(path)
    yield gr.get_commit(commit).modifications[0]
    gr.clear()
def miner():
    repo_path = os.path.abspath(working_path + repo_name)
    # Clone if necessary
    if not os.path.exists(repo_path):
        print("Cloning: {}".format(repo_name))
        for c in RepositoryMining(repo_git,
                                  clone_repo_to=os.path.abspath(
                                      working_path)).traverse_commits():
            pass
    else:
        print("{} clone done!".format(repo_name))

    # Extract FIX and BIC
    bic_csv = os.path.abspath(working_path + repo_name + "_all.csv")
    header = [
        "hash", "path", "size", "developer", "type", "fix", "bic_path",
        "bic_hash", "bic_size"
    ]
    if not os.path.exists(bic_csv):
        print("Extracting FIX and BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        to_date = datetime(2017, 12, 1, 12, 0, 0)
        gr = GitRepository(repo_path)
        gr2 = GitRepository(repo_path)
        for commit in RepositoryMining(
                repo_path,
                to=to_date,
                only_no_merge=True,
                only_modifications_with_file_types=extensions,
                reversed_order=True).traverse_commits():
            msg = commit.msg.lower()
            mods = commit.modifications
            if len(mods) < 50 and any(word in msg for word in keywords):
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.committer.email,
                    "fix": True
                }
                for mod in mods:
                    dout["type"] = mod.change_type
                    if mod.change_type == ModificationType.DELETE:
                        dout["path"] = mod.old_path
                    else:
                        dout["path"] = mod.new_path
                    bics_per_mod = gr.get_commits_last_modified_lines(
                        commit, mod)
                    for bic_path, bic_commit_hashs in bics_per_mod.items():
                        dout["bic_path"] = bic_path
                        for bic_commit_hash in bic_commit_hashs:
                            bic = gr2.get_commit(bic_commit_hash)
                            dout["bic_hash"] = bic_commit_hash
                            dout["bic_size"] = len(bic.modifications)
                            writer.writerow(dout)
                            out_file.flush()
            else:
                dout = {
                    "hash": commit.hash,
                    "size": len(mods),
                    "developer": commit.committer.email,
                    "fix": False,
                    "bic_path": "---",
                    "bic_hash": "---",
                    "bic_size": "---"
                }
                for mod in mods:
                    dout["path"] = mod.new_path
                    writer.writerow(dout)
                    out_file.flush()
        out_file.close()
    else:
        print("Extracting FIX and BIC done!")

    # Get unique BIC
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    unique_devs = set()
    unique_commits = set()
    fixes = {}
    unique_bics = set()
    unique_fics = set()
    for row in reader:
        unique_commits.add(row["hash"])
        if row["path"].endswith(tuple(extensions)):
            unique_devs.add(row["developer"])
            unique_bics.add(row["bic_hash"])
            unique_fics.add(row["bic_path"])
            if row["fix"] == "True":
                fixes[row["hash"]] = True
    unique_bics.remove("---")
    unique_fics.remove("---")
    in_file.close()
    print("Developers: {}, Commits: {} Defective: {}".format(
        len(unique_devs), len(unique_commits), len(fixes)))

    # Save list of BIC
    unique_bic_txt = os.path.abspath(working_path + repo_name +
                                     "_unique_bic.txt")
    out_file = open(unique_bic_txt, 'w', newline='', encoding="utf-8")
    for bic in unique_bics:
        out_file.write(bic)
        out_file.write("\n")
    out_file.close()

    # Save list of FIX
    unique_fix_txt = os.path.abspath(working_path + repo_name +
                                     "_unique_fix.txt")
    out_file = open(unique_fix_txt, 'w', newline='', encoding="utf-8")
    for fix in fixes:
        out_file.write(fix)
        out_file.write("\n")
    out_file.close()

    # Count fully and partially defective commits, and defective files in defective commits
    bic_csv = os.path.abspath(working_path + repo_name + "_bic_metrics.csv")
    header = ["bic_hash", "bic_size", "bic_path", "defective"]
    if not os.path.exists(bic_csv):
        print("Counting partial BIC")
        out_file = open(bic_csv, 'w', newline='', encoding="utf-8")
        writer = csv.DictWriter(out_file, delimiter=',', fieldnames=header)
        writer.writeheader()
        gr = GitRepository(repo_path)
        for bic_hash in unique_bics:
            commit = gr.get_commit(bic_hash)
            diff = count_file = len(commit.modifications)
            dout = {
                "bic_hash": bic_hash,
                "bic_size": len(commit.modifications)
            }
            for mod in commit.modifications:
                if mod.filename.endswith(
                        tuple(extensions)
                ) and mod.change_type is not ModificationType.DELETE:
                    dout["bic_path"] = mod.new_path
                    if mod.new_path in unique_fics:
                        diff -= 1
                        dout["defective"] = True
                    else:
                        dout["defective"] = False
                    writer.writerow(dout)
                    out_file.flush()
                else:
                    count_file -= 1
                    diff -= 1
        out_file.close()
    else:
        print("Counting partial BIC done!")

    # Calculate partially defective commits
    in_file = open(bic_csv, 'r', newline='', encoding="utf-8")
    reader = csv.DictReader(in_file, delimiter=',')
    bics = {}
    fully_defective = partially_defective = 0
    partially_defective_files = total_defective_files = 0
    for row in reader:
        if row["bic_path"].endswith(tuple(extensions)):
            if row["bic_hash"] in bics:
                bics[row["bic_hash"]].append(row["defective"])
            else:
                bics[row["bic_hash"]] = [row["defective"]]
    for key, value in bics.items():
        count_defective_files = value.count("True")
        if len(value) > 1:
            total_defective_files += count_defective_files
        if len(value) == count_defective_files:
            fully_defective += 1
        else:
            partially_defective += 1
            partially_defective_files += len(value) - count_defective_files
    if total_defective_files != 0:
        ratio_defective_files_in_defective_commits = round(
            (partially_defective_files / total_defective_files) * 100, 1)
    else:
        ratio_defective_files_in_defective_commits = 0
    denominator = fully_defective + partially_defective
    if denominator != 0:
        ratio_partially_defective_commits = round(
            (partially_defective / denominator) * 100, 1)
    else:
        ratio_partially_defective_commits = 0
    print(
        "Partially def. commits: {}%. Defective files in partially def. commits: {}%"
        .format(ratio_partially_defective_commits,
                ratio_defective_files_in_defective_commits))
Beispiel #14
0
def main():
    repo_path = sys.argv[1]
    repo_branch = 'master'
    base_dir_fixed = "data/fixed/{}".format(
        os.path.basename(os.path.normpath(repo_path)))
    base_dir_bug = "data/bug/{}".format(
        os.path.basename(os.path.normpath(repo_path)))

    if not os.path.exists(base_dir_bug):
        os.makedirs(base_dir_bug)
    if not os.path.exists(base_dir_fixed):
        os.makedirs(base_dir_fixed)

    gitRepo = GitRepository(repo_path)
    commits = RepositoryMining(repo_path,
                               only_in_branch=repo_branch).traverse_commits()

    i = 0

    for commit in commits:
        # print(commit.hash, commit.msg.split('\n')[0])
        try:
            if not valid_commit(commit):
                continue
            i += 1
            # if i==250: break

            fixed_files = []

            for m in commit.modifications:
                if not valid_modification(m):
                    continue

                bug_commit = gitRepo.get_commits_last_modified_lines(
                    commit, m)  ### uses SZZ

                if bug_commit == {}:
                    continue

                fixed_files.append(m.filename)
                fixed_file_name = "{}/{}_{}_{}".format(base_dir_fixed, str(i),
                                                       commit.hash[:6],
                                                       m.filename)

                for file in bug_commit:
                    if file.split('/')[-1] not in fixed_files:
                        continue
                    # print("\tfalallala", file, fixed_files)

                    latest_bug_commit_date = utc.localize(
                        datetime.strptime("1/1/1950 00:00:00",
                                          "%d/%m/%Y %H:%M:%S"))
                    latest_bug_commit_hash = ""

                    for past_commit_hash in bug_commit[file]:
                        past_commit = gitRepo.get_commit(past_commit_hash)
                        past_commit_date = past_commit.committer_date.replace(
                            tzinfo=utc)
                        if past_commit_date > latest_bug_commit_date:
                            latest_bug_commit_date = past_commit.author_date
                            latest_bug_commit_hash = past_commit_hash

                    latest_bug_commit = gitRepo.get_commit(
                        latest_bug_commit_hash)

                    for bug_m in latest_bug_commit.modifications:
                        if bug_m.filename not in fixed_files:
                            continue

                        if bug_m.source_code == None:
                            continue

                        bug_file_name = "{}/{}_{}_{}".format(
                            base_dir_bug, str(i), latest_bug_commit.hash[:6],
                            bug_m.filename)
                        with open(bug_file_name, 'w') as the_file:
                            the_file.write(bug_m.source_code)

                        with open(fixed_file_name, 'w') as the_file:
                            the_file.write(m.source_code)

                        print(i, commit.msg)  #.split('\n')[0])
                        print(fixed_file_name)
                        print(bug_file_name)
                        print("********")

        except Exception as e:
            print("[***]", e)
            # print(traceback.format_exc())
            print("Continuing for next commits")

    print("All done")