def mine_methods(self,
                     start_commit: str,
                     stop_commit: str,
                     filter_methods: Set[str] = None,
                     filter_files: Set[str] = None) -> int:
        methods = {}  # Dict[str, List[MinerBean]]
        commits_to_analyze = -1
        print('Mining: ' + self.repo_path)
        gr = GitRepository(self.repo_path)

        # Redefine start and stop commits
        print('Adjust start and stop commits.')
        first_commit = start_commit
        if start_commit is None:
            first_commit = gr.get_head().hash
        last_commit = stop_commit

        # Print start and stop commits info
        c1 = gr.get_commit(first_commit)
        print('Start: {} Author date: {} Committer date: {}'.format(
            c1.hash, c1.author_date, c1.committer_date))
        c2 = gr.get_commit(last_commit)
        print('Stop:  {} Author date: {} Committer date: {}'.format(
            c2.hash, c2.author_date, c2.committer_date))

        # Unnecessary in production
        # Count commits to analyze
        print('Retrieve commits to analyze.')
        commits = []
        for commit in RepositoryMining(self.repo_path,
                                       from_commit=first_commit,
                                       to_commit=last_commit,
                                       reversed_order=True).traverse_commits():
            commits.append(commit)
            print('{}) {} {}'.format(len(commits), commit.hash,
                                     commit.author_date))
        commits_to_analyze = len(commits)

        # Open CSV file and write header
        saver = Saver(self.csv_file)
        saver.create_csv_file()
        saver.print_csv_header()

        # Traverse commits and calculate metrics
        commit_count = 0
        # for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits():
        for commit in RepositoryMining(self.repo_path,
                                       from_commit=first_commit,
                                       to_commit=last_commit,
                                       reversed_order=True).traverse_commits():
            buggy = True if commit.hash in self.bic_commits else False
            fix = True if commit.hash in self.fix_commits else False
            mod_analyzed_count = 0
            count_files_per_commit = len(commit.modifications)
            for mod in commit.modifications:
                # Filter out unnecessary files
                if filter_files is None or mod.new_path in filter_files:
                    if mod.filename.endswith(tuple(self.allowed_extensions)):
                        mod_analyzed_count += 1
                        # Update key entry on rename
                        if mod.change_type is ModificationType.RENAME:
                            methods = self.update_keys(methods, mod.new_path,
                                                       mod.old_path)
                            if filter_files is not None:
                                filter_files.add(mod.old_path)
                        count_methods_per_file = len(mod.methods)
                        for method in mod.methods:
                            key = self.get_unique_key(mod.new_path,
                                                      mod.old_path,
                                                      method.name)
                            # For unwanted keys prevent metric calculation
                            if filter_methods is None or key in filter_methods:
                                lines = mod.diff_parsed
                                method_metrics = MethodMetrics(
                                    mod.source_code, method.start_line,
                                    method.end_line, lines, buggy, fix)
                                m_touched = method_metrics.is_touched()
                                m_fix = method_metrics.is_fix()
                                m_buggy = method_metrics.is_buggy()
                                mb = MetricsBean(
                                    commit.hash, commit.author_date,
                                    mod.new_path, method.name,
                                    method.start_line, mod.change_type.name,
                                    count_files_per_commit, mod.added,
                                    mod.removed, mod.nloc, mod.complexity,
                                    mod.token_count, count_methods_per_file,
                                    method_metrics.get_added_lines(),
                                    method_metrics.get_removed_lines(),
                                    method.nloc, method.complexity,
                                    method.token_count, buggy, fix,
                                    method_metrics.get_number_of_lines(),
                                    method.fan_in, method.fan_out,
                                    method.general_fan_out,
                                    len(method.parameters),
                                    commit.author.email, m_touched, m_fix,
                                    m_buggy)
                                # Append new bean
                                if key not in methods:
                                    methods[key] = []
                                methods.get(key, []).append(mb)
                                # Going back in the past ADD is the moment in which the a file, consequently a method, is added therefore it can be removed from the disc and flushed into the CSV to save RAM
                                if mod.change_type is ModificationType.ADD:
                                    self.flush_methods(methods, key, saver)
            commit_count += 1
            print(
                'Methods: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}'
                .format(len(methods), commit_count, commits_to_analyze,
                        commit.hash, commit.author_date.strftime('%d/%m/%Y'),
                        len(commit.modifications), mod_analyzed_count, buggy,
                        fix))
        for key, value in methods.items():
            saver.add_method_to_csv(key, value)
        saver.close_csv_file()
        print('Mining ended')
        return commit_count
Esempio n. 2
0
    def mine(self, start_date: datetime, stop_date: datetime) -> int:
        beans = MyMetricBeans()
        developers = {}
        my_commits = MyCommits()
        commits_to_analyze = -1
        print('Mining: ' + self.repo_path)
        gr = GitRepository(self.repo_path)

        # Unnecessary in production
        # Count commits to analyze
        print('Retrieve commits to analyze.')
        commits = []
        for commit in RepositoryMining(self.repo_path,
                                       since=start_date,
                                       to=stop_date).traverse_commits():
            commits.append(commit)
            print('{}) {} {}'.format(len(commits), commit.hash,
                                     commit.author_date))
        commits_to_analyze = len(commits)

        # Open CSV file and write header
        saver = Saver(self.csv_file, self.repo_path)
        saver.create_csv_file()
        saver.print_csv_header()

        # Traverse commits and calculate metrics
        commit_count = 0
        # for commit in RepositoryMining(self.repo_path, from_commit=last_commit, to_commit=first_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits():
        for commit in RepositoryMining(self.repo_path,
                                       since=start_date,
                                       to=stop_date).traverse_commits():
            buggy = True if commit.hash in self.bic_commits else False
            fix = True if commit.hash in self.fix_commits else False
            mod_analyzed_count = 0
            count_files_per_commit = len(commit.modifications)
            for mod in commit.modifications:
                # Filter out unnecessary files
                count_methods_per_file = -1  # len(mod.methods)
                if mod.filename.endswith(tuple(self.allowed_extensions)):
                    package = self.get_package(mod.new_path, mod.old_path,
                                               mod.filename)
                    my_commits.append(commit.hash, mod.new_path, mod.old_path)
                    mb = MetricBean(
                        commit.hash, mod.filename, mod.new_path, mod.old_path,
                        package, mod.change_type.name, count_files_per_commit,
                        mod.added, mod.removed, mod.nloc, mod.complexity,
                        mod.token_count, buggy, fix, commit.author_date,
                        commit.author.email, commit.committer_date,
                        commit.committer.email, count_methods_per_file)
                    mod_analyzed_count += 1
                    if mod.change_type is ModificationType.ADD:
                        # Add new key and metrics
                        key = mod.new_path
                        beans.add(key, mb)
                    elif mod.change_type is ModificationType.COPY:
                        # Add new key and metrics
                        key = mod.new_path
                        beans.add(key, mb)
                        print("CASE COPY: {} {}".format(
                            mod.new_path, mod.old_path))
                    elif mod.change_type is ModificationType.DELETE:
                        # Flush in file and remove key
                        key = mod.old_path
                        beans.add(key, mb)
                        commit_list_of_file = beans.get(key)
                        saver.flush(commit_list_of_file, beans, developers,
                                    my_commits)
                        beans.remove(key)
                    elif mod.change_type is ModificationType.RENAME:
                        # Update key and metrics
                        old_key = mod.old_path
                        new_key = mod.new_path
                        beans.add(old_key, mb)
                        beans.update_key(old_key, new_key)
                    else:
                        # Update metrics
                        key = mod.new_path
                        beans.add(key, mb)
                    if commit.author.email in developers:
                        developers[commit.author.email] += mod.added
                    else:
                        developers[commit.author.email] = mod.added
            commit_count += 1
            print(
                'Files: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}'
                .format(beans.get_count(), commit_count, commits_to_analyze,
                        commit.hash, commit.author_date.strftime('%d/%m/%Y'),
                        count_files_per_commit, mod_analyzed_count, buggy,
                        fix))

        # Flush everything else
        print("Save metrics, it's require time!")
        count = 1
        key_number = len(beans.get_keys())
        for key in beans.get_keys():
            print("Save: {}/{}".format(count, key_number))
            saver.flush(beans.get(key), beans, developers, my_commits)
            count += 1
        saver.close_csv_file()
        print('Mining ended')
        return commit_count