Ejemplo n.º 1
0
 def __init__(self, repository: str, owner: str):
     self.owner = owner
     self.repository = os.path.split(repository)[-1]
     self.repo = GitRepository(repository)
     self.mining = RepositoryMining(repository)
     self.pairs = []
     random.seed(42)
Ejemplo n.º 2
0
    def update_has_impact_code(apps, schema_editor):
        # We can't import the Person model directly as it may be a newer
        # version than this migration expects. We use the historical version.
        Modification = apps.get_model('contributions', 'Modification')

        for mod in Modification.objects.filter(commit__tag__project_id=2):
            GR = GitRepository(mod.commit.tag.project.project_path)

            diff_text = GR.parse_diff(mod.diff)

            added_text = ""
            for line in diff_text['added']:
                added_text = added_text + "\n" + str(
                    line[0]) + ' ' + "" + ' ' + line[1]

            deleted_text = ""
            for line in diff_text['deleted']:
                deleted_text = deleted_text + "\n" + str(
                    line[0]) + ' ' + "" + ' ' + line[1]

            added_uncommented_lines = detect_impact_loc(added_text)
            deleted_uncommented_lines = detect_impact_loc(deleted_text)
            mod.has_impact_loc = added_uncommented_lines or deleted_uncommented_lines

            mod.save()
Ejemplo n.º 3
0
    def get_NCOMM_NADEV_NNDEV_NSCTR(self, index: int, commits: List[MetricBean], beans: MyMetricBeans, my_commits: MyCommits) -> [int, int, int, int]:
        devs = []
        packages = set()
        # commit_count = set()
        # for i in range(0, index):
        #     git_hash = commits[i].git_hash
        #     if git_hash in my_commits.hashes:
        #         for path in my_commits.hashes[git_hash]:
        #             for mb in beans.get(path):
        #                 commit_count.add(mb.git_hash)
        #                 devs.append(mb.committer_email)
        #                 packages.add(mb.package)

        gr = GitRepository(self.repo_path)
        commit = gr.get_commit(commits[index].git_hash)
        commit_count = 1
        for mod in commit.modifications:
            if mod.new_path is not None and mod.new_path == commits[index].new_path:  # This must be changed
                commits_modified_file = gr.get_commits_modified_file(mod.new_path)
                for cmf in commits_modified_file:
                    commit_count += 1
                    c = gr.get_commit(cmf)
                    devs.append(c.author.email)
                    for m in c.modifications:
                        path = m.new_path if m.new_path is not None else m.old_path
                        if path.endswith(m.filename):
                            package = path[:-(len(m.filename) + 1)]
                            packages.add(package)
        return [commit_count, len(devs), len(set(devs)), len(packages)]
Ejemplo n.º 4
0
def ublame_cli(filename, patterns):
    filename = os.path.abspath(filename)
    repo_path = repo_path_for(filename)
    relative_filename = filename.split(repo_path)[-1].strip("/")
    repo = GitRepository(repo_path)

    for commit_hash in repo.get_commits_modified_file(relative_filename):
        commit = repo.get_commit(commit_hash)
        diff_commit(commit, patterns)
Ejemplo n.º 5
0
def main():
    print('Running...')
    # Report_data: a list uses to store the result data
    report_data = []
    # Get the repo path from command line arguments
    path = sys.argv[1]

    # Analyse the commit in the repo
    git_repo = GitRepository(path)
    commits = git_repo.get_list_commits()
    for i,commit in enumerate(commits):
        for j,mod in enumerate(commit.modifications):
            diffs = git_repo.parse_diff(mod.diff)
            # Get Method Info From Modification Detail
            add_func_name,add_func_sig,add_func_modifier,add_func_mod_row,add_func_params = get_func_info(diffs, 'added')
            del_func_name,del_func_sig,del_func_modifier,del_func_mod_row,del_func_params = get_func_info(diffs, 'deleted')
            # Find method that have removed a parameter
            # Consider the Overloading in Java method
            # 1 - added method number <= deleted method number
            for add_idx,add_func in enumerate(add_func_name):
                if add_func in del_func_name:
                    add_override_count = add_func_name.count(add_func)
                    del_override_count = del_func_name.count(add_func)
                    if (add_override_count <= del_override_count):
                        # Find the deleted method that near the added method 
                        line_num = add_func_mod_row[add_idx]
                        del_idx = find_mod_func_pair(line_num, add_func, del_func_name, del_func_mod_row)
                        if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]):
                            if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1):
                                if (all(elem in del_func_params[del_idx]  for elem in add_func_params[add_idx])):
                                    new_sig = add_func + add_func_sig[add_idx]
                                    old_sig = add_func + del_func_sig[del_idx]  
                                    report_data.append([commit.hash,mod.filename,old_sig,new_sig])
            # 2 - added method number > deleted method number
            for del_idx,del_func in enumerate(del_func_name):
                if del_func in add_func_name:
                    add_override_count = add_func_name.count(del_func)
                    del_override_count = del_func_name.count(del_func)
                    if (add_override_count > del_override_count):
                        line_num = del_func_mod_row[del_idx]
                        add_idx = find_mod_func_pair(line_num, del_func, add_func_name, add_func_mod_row)
                        if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]):
                            if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1):
                                if (all(elem in del_func_params[del_idx]  for elem in add_func_params[add_idx])):
                                    new_sig = del_func + add_func_sig[add_idx]
                                    old_sig = del_func + del_func_sig[del_idx]  
                                    report_data.append([commit.hash,mod.filename,old_sig,new_sig])
    # Save the report data to a CSV
    report_data = list(set(tuple(element) for element in report_data))
    header = ['Commit SHA', 'Java File', 'Old function signature', 'New function signature']
    report_data.insert(0,header)
    with open('report.csv', 'w', newline='') as resultFile:  
        wr = csv.writer(resultFile, dialect='excel')
        wr.writerows(report_data)
    print('Finish!')
Ejemplo n.º 6
0
def main():
    start = time.time()
    source_database = ""
    dst_database = ""
    path = "workdir"
    dest_project_database_controller = Project_commits_info_Controller(
        dst_database)
    dest_model_database_controller = Model_commits_info_Controller(
        dst_database)

    project_verbatim = Project_commits_verbatim_Controller(dst_database)
    model_verbatim = Model_commits_verbatim_Controller(dst_database)
    # create a database connection
    conn = create_connection(source_database)
    dst_conn = create_connection(dst_database)
    with dst_conn:
        processed_id, processed_mdl_name = get_id_name(dst_conn)
    with conn:
        id_urls = get_repo_id_urls(conn)
        for id_url in id_urls:
            id, url, model_files, hash = id_url
            if not os.path.exists(path):
                os.mkdir(path)
            if url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink" \
                    or url=="https://github.com/OpenCadd/Lego_nxt_car" \
                    or url=="https://github.com/StefanMack/ProjSensSys" \
                    or url=="https://github.com/chiloanel/UWMatlab"\
                    or url == "https://github.com/alesgraz/kinect2-SDK-for-Simulink":
                continue
            try:
                if id not in processed_id:
                    clone = "git clone " + url + " " + path
                    os.system(clone)  # Cloning
                    gr = GitRepository(path)
                    gr.checkout(hash)
                    url = path
                    project_lifetime = write_project_commit_info(
                        url, id, hash, dest_project_database_controller,
                        project_verbatim)
                    write_model_commit_info(model_files, url, id, hash,
                                            dest_model_database_controller,
                                            project_lifetime, model_verbatim)
                else:
                    logging.info("Skipping . ALready Processed {}".format(id))
            except Exception as e:
                logging.error(e)
                continue
            finally:
                shutil.rmtree(path)
    end = time.time()
    logging.info("IT took {} seconds".format(end - start))
Ejemplo n.º 7
0
def main():
    repo_path = sys.argv[1]
    repo_branch = 'master'

    base_dir_not_bug = "data2/not_bug/{}".format(
        os.path.basename(os.path.normpath(repo_path)))
    base_dir_bug = "data2/bug/{}".format(
        os.path.basename(os.path.normpath(repo_path)))
    if not os.path.exists(base_dir_bug):
        os.makedirs(base_dir_bug)
    if not os.path.exists(base_dir_not_bug):
        os.makedirs(base_dir_not_bug)

    gitRepo = GitRepository(repo_path)
    # commits = RepositoryMining(repo_path, only_in_branch=repo_branch).traverse_commits()

    all_files = gitRepo.files()
    counter = 0
    total_files = len(all_files)
    for file in all_files:
        counter += 1
        relative_path = os.path.relpath(file, repo_path)
        # print("[*] {}/{} {}".format(counter, total_files, relative_path))
        print(relative_path)

        if not valid_source_file(file):
            continue

        gprepo = gitRepo.repo

        modifying_commits = gprepo.iter_commits('--all', paths=relative_path)

        buggy = False

        for commit in modifying_commits:
            print(commit.hexsha)
            if is_buggy_commit(commit.message):
                print(commit.message)
                buggy = True
                break

        file_name_to_write = "{}_{}".format(counter, os.path.basename(file))
        if buggy:
            file_path_to_write = os.path.join(base_dir_bug, file_name_to_write)
            shutil.copyfile(file, file_path_to_write)
        else:
            file_path_to_write = os.path.join(base_dir_not_bug,
                                              file_name_to_write)
            shutil.copyfile(file, file_path_to_write)

    print("All Done!")
Ejemplo n.º 8
0
    def checkout_all_commit(self, repo_cfg: dict):

        gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name'])

        df = pd.read_csv(self.cfg['paths']['commit_report'] +
                         repo_cfg['commit_file'],
                         header=0)
        df.sort_values(ascending=True, inplace=True, by=['author_date'])

        for commit in df['hash'].tolist():
            gr.checkout(commit)
            repo_cfg['commit'] = commit
            for detector in self.detectors:
                detector.run_on(repo_cfg)
Ejemplo n.º 9
0
    def setUp(self):
        name = "cassandra20200615"
        mode = "train"
        repositories = [{
            "name": "cassandra20200615",
            "url": "",
            "CommitTarget": "",
            "filterFile": "",
            "codeIssueJira": "",
            "projectJira": ""
        }]
        parameters = {}
        option = {
            "name": name,
            "mode": mode,
            "repositories": repositories,
            "parameters": parameters  #needless when to infer.
        }
        option = Option(option)

        self.dataset = Dataset(option.getRepositorieImproved())
        self.repository = repositories[0]
        print(
            os.path.join(UtilPath.Test(), "testDataset",
                         self.repository["name"], "repository"))
        self.gr = GitRepository(
            os.path.join(UtilPath.Test(), "testDataset",
                         self.repository["name"], "repository"))
Ejemplo n.º 10
0
def init(path=os.curdir):
    """Initialize enguard in a git repository."""
    repo = GitRepository(path)

    io = FileIO()
    init_config(repo.path, io)
    install_hooks(repo.path, io)
Ejemplo n.º 11
0
def register_git_projects(conf: ConfigParser = None) -> None:
    conf = conf or crawler_config().conf
    gl = gitlab_api()
    for key in [s for s in conf.sections() if s.find("project.") == 0]:
        section = conf[key]
        group, local_path = section.get("group"), section.get("local_path")
        if local_path and local_path[0] == "~":
            local_path = expanduser(local_path)
        project_type, filter = section.get("type", "MISC"), section.get("filter", "*")
        if local_path is None:
            # remote project, get project info from gitlab
            try:
                for proj in gl.groups.get(group).projects.list(
                    include_subgroups=True, as_list=False
                ):
                    register_remote_repository(proj, project_type)
            except GitlabGetError as e:
                current_app.logger.info(f"gitlab search {group} has some error {e}")
        else:
            for path in glob.glob(f"{local_path}/{filter}", recursive=True):
                try:
                    if GitRepository(path).total_commits() > 0:
                        register_local_repository(path, project_type)
                except InvalidGitRepositoryError:
                    current_app.logger.info(f"skipping non Git path {path}")
                except Exception:
                    current_app.logger.info(f"skipping invalid repository path {path}")
Ejemplo n.º 12
0
def create_some_commit(repo_path: str, file_name: str = "dummy.txt") -> None:
    repo = GitRepository(repo_path).repo
    a_file = f"{repo_path}/{file_name}"
    with open(a_file, "w") as f:
        f.write("something\n")
    repo.index.add(a_file)
    repo.index.commit("some commit")
Ejemplo n.º 13
0
    def prepare(self):
        for repository in self.repositories:
            datasetRepository = []
            if (not os.path.exists(repository["path"])):
                pass
                #rawdata=Rawdata()
            gr = GitRepository(repository["path"])
            pathsFile = [
                pathFile
                for pathFile in glob.glob(repository["path"] + "/**/*.mjava",
                                          recursive=True)
                if re.match(repository["filterFile"], pathFile)
            ]
            commitsBug = self.getCommitsBug(repository)

            with tqdm(
                    pathsFile,
                    bar_format=
                    "{desc}: {percentage:3.0f}%|{bar:10}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]"
            ) as pbar:
                for pathFile in pbar:
                    nameFile = os.path.basename(pathFile)
                    pbar.postfix = nameFile
                    pbar.desc = repository["name"]
                    datasetRepository.append(
                        Data(gr, pathFile, commitsBug).getData())
            self.dataset.extend(datasetRepository)
Ejemplo n.º 14
0
def test_tabs():
    diff = '@@ -1,4 +1,17 @@\r\n' + \
           ' a\r\n' + \
           ' b\r\n' + \
           '-c\r\n' + \
           '+\td\r\n' + \
           '+cc\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\tg\r\n' + \
           '+\r\n' + \
           '+j\r\n' + \
           ' '

    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (3, 'c') in deleted
    assert 1 == len(deleted)

    assert (3, '\td') in added
    assert (4, 'cc') in added
    assert (5, '') in added
    assert (6, '') in added
    assert (7, '') in added
    assert (8, '') in added
    assert (9, '') in added
    assert (10, '') in added
    assert (11, '') in added
    assert (12, '') in added
    assert (13, '') in added
    assert (14, '\tg') in added
    assert (15, '') in added
    assert (16, 'j') in added
    assert 14 == len(added)
Ejemplo n.º 15
0
 def get_SCTR(self, index: int, commits: List[MetricBean], beans: MyMetricBeans, my_commits: MyCommits) -> int:
     packages = set()
     # for i in range(0, index):
     #     git_hash = commits[i].git_hash
     #     if git_hash in my_commits.hashes:
     #         for path in my_commits.hashes[git_hash]:
     #             for mb in beans.get(path):
     #                 if commits[index].committer_email == mb.committer_email:
     #                     packages.add(mb.package)
     gr = GitRepository(self.repo_path)
     commit = gr.get_commit(commits[index].git_hash)
     if commits[index].committer_email == commit.committer.email:
         for mod in commit.modifications:
             path = mod.new_path if mod.new_path is not None else mod.old_path
             if path.endswith(mod.filename):
                 package = path[:-(len(mod.filename) + 1)]
                 packages.add(package)
     return len(packages)
Ejemplo n.º 16
0
    async def _update_commits(
        self,
        git_repo: GitRepository,
        current_commits: MutableMapping,
        checkout_point: str,
    ) -> None:

        for c in git_repo.get_list_commits(branch=checkout_point):
            if c.hash not in current_commits.keys():
                current_commits[c.hash] = c
Ejemplo n.º 17
0
def test_deletions():
    diff = '@@ -2,6 +2,7 @@ aa\r\n' + \
           ' bb\r\n' + \
           ' cc\r\n' + \
           ' log.info(\"aa\")\r\n' + \
           '-log.debug(\"b\")\r\n' + \
           ' dd\r\n' + \
           ' ee\r\n' + \
           ' ff'

    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (5, 'log.debug("b")') in deleted
    assert len(deleted) == 1
    assert len(added) == 0
Ejemplo n.º 18
0
    def __init__(self,
                 repoURL,
                 first=None,
                 second=None,
                 fromCommit=None,
                 since=None,
                 to=None):
        start = time.perf_counter()
        self.__gitRepo = GitRepository(repoURL)

        if first is not None and second is not None and since is None and to is None:
            self.repo = RepositoryMining(repoURL,
                                         from_commit=first,
                                         to_commit=second)
            self.__repo_type = RepoType.BETWEEN_COMMITS
        elif first is not None and second is None and since is None and to is None:
            self.repo = RepositoryMining(repoURL, single=first)
            self.__repo_type = RepoType.SINGLE_COMMIT
        elif first is None and second is None and since is not None and to is not None:
            try:
                date1 = parser.parse(since)
                date2 = parser.parse(to)
                self.repo = RepositoryMining(repoURL, since=date1, to=date2)
                self.__repo_type = RepoType.DATETIME
            except Exception:
                raise Exception("Entered Datetime is not valid.")
        elif fromCommit is not None:
            self.repo = RepositoryMining(path_to_repo=repoURL,
                                         from_commit=fromCommit)
            self.__repo_type = RepoType.FROM_COMMIT
        else:
            self.repo = RepositoryMining(path_to_repo=repoURL)
            self.__repo_type = RepoType.ALL

        print("repoMiner was created")

        self.__files = []  # number of analyzed files
        self.__files_with_methods = []
        self.__test_files = []  # number of test files
        self.__production_files = []  # number of production files
        self.__commits = []  # List[str] of analysed commits hash
        self.__commits_with_modified_methods = set(
        )  # List[str] of analysed commits with modified methods hash
        self.__production_methods = []  # List[ModifiedMethods]
        self.__test_methods = []  # List[ModifiedMethods]
        self.__modified_methods = []  # List[ModifiedMethods]
        self.__moved_files_without_changes = [
        ]  # List of files without changes
        self.__analyzed_commits = []  # List[AnalyzedCommits]
        self.__matched_files = []  # List of matched files
        self.__not_matched_files = None  # instance of NotMatchedFiles
        self.__GetModifications()  # performs analysis
        end = time.perf_counter()
        self.__analyse_time = "{:.2f}".format(
            (end - start) / 60)  # analysis performing time
Ejemplo n.º 19
0
def find_occurence_in_commit(commit, word, file):

    conditional_added = 0
    commit_with_conditional = []
    commit_with_removed_conditional = []

    for m in commit.modifications:

        if (str(m.source_code).find(word) != -1):

            file.write("************** date : " + str(commit.committer_date) +
                       "*****************\n")
            diff = word
            gr = GitRepository('test-repos/test1')
            parsed_lines = gr.parse_diff(diff)

            if (len(parsed_lines['added']) > 0):
                conditional_added = conditional_added + len(
                    parsed_lines['added'])

            lines = str(m.source_code).splitlines()
            commit_with_conditional.append(m.new_path)

            for line in lines:
                if line.find(word) != -1:
                    file.write("\t\tligne ajouté : {}\n".format(line))

            if (len(parsed_lines['deleted']) > 0):
                conditional_added = conditional_added + len(
                    parsed_lines['deleted'])

            lines = str(m.source_code).splitlines()
            commit_with_removed_conditional.append(m.new_path)

            for line in lines:
                if line.find(word) != -1:
                    file.write("\t\tligne retiré : {}\n".format(line))

    if (len(commit_with_conditional) > 0):
        file.write(str(commit_with_conditional) + "\n\n")

    return conditional_added
Ejemplo n.º 20
0
def test_real_example():
    diff = '@@ -72,7 +72,7 @@ public class GitRepository implements SCM {\r\n' + \
           ' \r\n' + \
           '        private static Logger log = Logger.getLogger(GitRepository.class);\r\n' + \
           ' \r\n' + \
           '-       public GitRepository(String path) {\r\n' + \
           '+       public GitRepository2(String path) {\r\n' + \
           '                this.path = path;\r\n' + \
           '                this.maxNumberFilesInACommit = checkMaxNumberOfFiles();\r\n' + \
           '                this.maxSizeOfDiff = checkMaxSizeOfDiff();\r\n' + \
           '@@ -155,7 +155,7 @@ public class GitRepository implements SCM {\r\n' + \
           '                return git.getRepository().getBranch();\r\n' + \
           '        }\r\n' + \
           ' \r\n' + \
           '-       public ChangeSet getHead() {\r\n' + \
           '+       public ChangeSet getHead2() {\r\n' + \
           '                Git git = null;\r\n' + \
           '                try {\r\n' + \
           '                        git = openRepository();\r\n' + \
           '@@ -320,6 +320,7 @@ public class GitRepository implements SCM {\r\n' + \
           ' \r\n' + \
           '                return diffs;\r\n' + \
           '        }\r\n' + \
           '+       newline\r\n' + \
           ' \r\n' + \
           '        private void setContext(DiffFormatter df) {\r\n' + \
           '                String context = System.getProperty(\"git.diffcontext\");'

    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (75, '       public GitRepository(String path) {') in deleted
    assert (158, '       public ChangeSet getHead() {') in deleted
    assert 2 == len(deleted)

    assert (75, '       public GitRepository2(String path) {') in added
    assert (158, '       public ChangeSet getHead2() {') in added
    assert (323, '       newline') in added
    assert 3 == len(added)
Ejemplo n.º 21
0
def test_ignore_add_whitespaces_and_modified_normal_line():
    gr = GitRepository('test-repos/test14')
    commit = list(
        RepositoryMining('test-repos/test14',
                         single="52716ef1f11e07308b5df1b313aec5496d5e91ce").
        traverse_commits())[0]
    assert len(commit.modifications) == 1
    parsed_normal_diff = gr.parse_diff(commit.modifications[0].diff)
    commit = list(
        RepositoryMining('test-repos/test14',
                         skip_whitespaces=True,
                         single="52716ef1f11e07308b5df1b313aec5496d5e91ce").
        traverse_commits())[0]
    assert len(commit.modifications) == 1
    parsed_wo_whitespaces_diff = gr.parse_diff(commit.modifications[0].diff)
    assert len(parsed_normal_diff['added']) == 2
    assert len(parsed_wo_whitespaces_diff['added']) == 1

    assert len(parsed_normal_diff['deleted']) == 1
    assert len(parsed_wo_whitespaces_diff['deleted']) == 0
Ejemplo n.º 22
0
def test_diff_no_newline():
    """
    If a file ends without a newline git represents this with the additional line
        \\ No newline at end of file
    in diffs. This test asserts these additional lines are parsed correctly.
    """
    gr = GitRepository('test-repos/no_newline')

    mod = gr.get_commit(
        '52a78c1ee5d100528eccba0a3d67371dbd22d898').modifications[0]
    added = mod.diff_parsed['added']
    deleted = mod.diff_parsed['deleted']

    assert (
        1, 'test1'
    ) in deleted  # is considered as deleted as a 'newline' command is added
    assert (1, 'test1') in added  # now with added 'newline'
    assert (2, 'test2') in added

    gr.clear()
Ejemplo n.º 23
0
def run_for_one_project(db_name,project_name, repo_path,thread_id=0):
    db_obj = db.DB()
    db_obj.set_db_name(db_name)
    cursor, conn = db_obj.connect_mysql()
    gr = GitRepository('{}/{}'.format(repo_path, project_name))
    totalCommits = gr.total_commits()
    count = 0
    try:
        for commit in RepositoryMining('{}/{}'.format(repo_path, project_name),
                                       only_modifications_with_file_types=['.java']).traverse_commits():
            msg = commit.msg.lower()  # convert the commit message to lower case
            for key in bugs:
                # def __init__(self,db_name, commit_id,project_name,message,identification_key,commit_date, author_name, author_email):
                if key in msg:
                    # print("{}:{}:{}".format(key, msg, commit.hash))
                    bugfix = bug.BugFix(db_obj,cursor,conn, commit.hash, project_name, msg, key, commit.committer_date,
                                        commit.author.name, commit.author.email)
                    bugfix.insert_into_database()  # insert bugfix
                    for modified_file in commit.modifications:
                        if modified_file.filename.endswith('.java'):
                            # print("{} Modified files: {}".format(commit.hash,modified_file.new_path)
                            churn = modified_file.added + modified_file.removed
                            bug_fix_file = bug.BugFixFile(db_obj,cursor,conn, commit.hash, modified_file.new_path, churn)
                            bug_induce_commits = gr.get_commits_last_modified_lines(commit, modified_file)
                            bugfix.set_induce_commits(bug_induce_commits.get(modified_file.new_path))
                            # at this point you can insert bug fix and modified files
                            bugfix.insert_into_bug_fix_induce()  # insert bug fix induce
                            bug_fix_file.insert_into_database()  # insert bug fix file
                            try:
                                for ind_commit in bug_induce_commits.get(modified_file.new_path):
                                    getDetailsOfInduceCommit(db_obj,cursor,conn, project_name, gr, ind_commit)
                            except:
                                print("no induce commits found")
                    break

            count = count + 1
            if count%100==0:
                print("Thread {}: Done processing: {} {}/{}".format(thread_id,commit.hash, count, totalCommits))
        db_obj.close_connection(conn)
    except:
        print("Exception occured")
Ejemplo n.º 24
0
def computeRepoMetrics(repoPath):
    print("STARTED:" + repoPath)
    gr = GitRepository(repoPath)
    #for each repo calculate repo metrics in parallel
    # calculateStructAndSemanticScattering =>
    #            we finally return file pairs and their related semantic
    #           and structural differences for each two pairs of files for the entire repo
    # analyzeCommits =>
    #             collects other related metrics for the interval
    parallelMetricProcessing(analyzeCommits(repoPath, gr),
                             calculateStructAndSemanticScattering(gr), 30, gr)
    print("COMPLETED:" + repoPath)
Ejemplo n.º 25
0
def test_diff_histogram():
    # without histogram
    commit = list(
        RepositoryMining('test-repos/test13',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890").
        traverse_commits())[0]
    mod = commit.modifications[0]
    gr = GitRepository('test-repos/test13')
    diff = gr.parse_diff(mod.diff)
    assert len(diff['added']) == 11
    assert (3, '    if (path == null)') in diff['added']
    assert (5, '        log.error("Icon path is null");') in diff['added']
    assert (6, '        return null;') in diff['added']
    assert (8, '') in diff['added']
    assert (9, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['added']
    assert (10, '') in diff['added']
    assert (11, '    if (imgURL == null)') in diff['added']
    assert (12, '    {') in diff['added']
    assert (14, '        return null;') in diff['added']
    assert (16, '    else') in diff['added']
    assert (17, '        return new ImageIcon(imgURL);') in diff['added']

    assert len(diff['deleted']) == 7
    assert (3, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['deleted']
    assert (4, '') in diff['deleted']
    assert (5, '    if (imgURL != null)') in diff['deleted']
    assert (7, '        return new ImageIcon(imgURL);') in diff['deleted']
    assert (9, '    else') in diff['deleted']
    assert (10, '    {') in diff['deleted']
    assert (13, '    return null;') in diff['deleted']

    # with histogram
    commit = list(
        RepositoryMining('test-repos/test13',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890",
                         histogram_diff=True).traverse_commits())[0]
    mod = commit.modifications[0]
    gr = GitRepository('test-repos/test13')
    diff = gr.parse_diff(mod.diff)
    assert (4, '    {') in diff["added"]
    assert (5, '        log.error("Icon path is null");') in diff["added"]
    assert (6, '        return null;') in diff["added"]
    assert (7, '    }') in diff["added"]
    assert (8, '') in diff["added"]
    assert (11, '    if (imgURL == null)') in diff["added"]
    assert (12, '    {') in diff["added"]
    assert (13, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["added"]
    assert (14, '        return null;') in diff["added"]
    assert (17, '        return new ImageIcon(imgURL);') in diff["added"]

    assert (6, '    {') in diff["deleted"]
    assert (7, '        return new ImageIcon(imgURL);') in diff["deleted"]
    assert (10, '    {') in diff["deleted"]
    assert (11, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["deleted"]
    assert (12, '    }') in diff["deleted"]
    assert (13, '    return null;') in diff["deleted"]
Ejemplo n.º 26
0
    def checkout_refactored_commit(self, repo_cfg: dict, call_back: callable):
        gr = GitRepository(path=self.cfg['paths']['repo'] + repo_cfg['name'])
        try:
            gr.reset()
        except:
            pass

        df = pd.read_csv(self.cfg['paths']['commit_report'] +
                         repo_cfg['name'] + "_refactored.csv",
                         header=0)
        commits = df['commit'].tolist()
        previous = df['previous'].tolist()
        finished_commit = []

        # resume if output exist
        for file in os.listdir(self.cfg['paths']['smell_report'] +
                               repo_cfg['name']):
            if re.match(r'\d+\_\w+\_\w+\.csv', file):
                file_id, cs_type, commit = file.split('_')
                finished_commit.append(commit.replace('.csv', ''))

        unique_commit = set(commits + previous) - {np.nan}
        unique_commit -= set(finished_commit)

        cpt = 0
        cpt_total = len(unique_commit)
        for commit in unique_commit:
            cpt += 1
            print(f"{commit}\t\t\t{repo_cfg['name']}\t{cpt}/{cpt_total}")
            gr.checkout(commit)
            print('\t checkout -done!')
            repo_cfg['commit'] = commit
            call_back(repo_cfg)
Ejemplo n.º 27
0
def test_extract_line_number_and_content():
    diff = "@@ -1,8 +1,8 @@\r\n" + \
           "-a\r\n" + \
           "-b\r\n" + \
           "-c\r\n" + \
           "-log.info(\"a\")\r\n" + \
           "-d\r\n" + \
           "-e\r\n" + \
           "-f\r\n" + \
           "+aa\r\n" + \
           "+bb\r\n" + \
           "+cc\r\n" + \
           "+log.info(\"aa\")\r\n" + \
           "+dd\r\n" + \
           "+ee\r\n" + \
           "+ff\r\n" + \
           " "
    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (1, 'a') in deleted
    assert (2, 'b') in deleted
    assert (3, 'c') in deleted
    assert (4, 'log.info(\"a\")') in deleted
    assert (5, 'd') in deleted
    assert (6, 'e') in deleted
    assert (7, 'f') in deleted

    assert (1, 'aa') in added
    assert (2, 'bb') in added
    assert (3, 'cc') in added
    assert (4, 'log.info(\"aa\")') in added
    assert (5, 'dd') in added
    assert (6, 'ee') in added
    assert (7, 'ff') in added
Ejemplo n.º 28
0
 def __get_bug_introducing_commits(self, path_to_repo, from_commit, to_commit):
     bug_introducing_commit_hashes = {}
     for commit in RepositoryMining(path_to_repo=path_to_repo,
                                    from_commit=from_commit,
                                    to_commit=to_commit,
                                    reversed_order=False).traverse_commits():
         if self.__is_message_bugfix(commit.msg):
             bug_introducing = GitRepository(path_to_repo).get_commits_last_modified_lines(commit)
             for file in bug_introducing:
                 previous = bug_introducing_commit_hashes.get(file, set())
                 for commits in bug_introducing[file]:
                     previous.add(commits)
                 bug_introducing_commit_hashes[file] = previous
     return bug_introducing_commit_hashes
Ejemplo n.º 29
0
    def _drill(self):
        """Parses all commits
    :returns: a tupple containing a list of commits and
      a Pydriller GitRepository commit
    """
        rep_obj = GitRepository(self.config.REPO)
        commits = []
        for commit in RepositoryMining(
                self.config.REPO,
                since=self.config.START_DATE,
                to=self.config.END_DATE).traverse_commits():
            commits.append(Commit(commit))

        return commits, rep_obj
Ejemplo n.º 30
0
def clone_repo(user, repo_name):
    # viene copiato il repository nel path locale tmp_dir/repo_name e viene restituita in output
    try:
        url = "https://{}:{}@github.com/{}/{}".format(
            'gastige', 'Sistemi.Distribuiti.2', user, repo_name)
        print("git clone {}".format(url))
        repo_name = get_repo_name_from_url(url)
        path = tmp_dir/repo_name
        if os.path.exists(path):
            clean_tmp_dir(path)
        Repo.clone_from(url, path, progress=Progress())
        print("\nRepository {} clonata.".format(repo_name))
        return GitRepository(path)
    except:
        return None