def mine_methods(self, start_commit: str, stop_commit: str, filter_methods: Set[str] = None, filter_files: Set[str] = None) -> int: methods = {} # Dict[str, List[MinerBean]] commits_to_analyze = -1 print('Mining: ' + self.repo_path) gr = GitRepository(self.repo_path) # Redefine start and stop commits print('Adjust start and stop commits.') first_commit = start_commit if start_commit is None: first_commit = gr.get_head().hash last_commit = stop_commit # Print start and stop commits info c1 = gr.get_commit(first_commit) print('Start: {} Author date: {} Committer date: {}'.format( c1.hash, c1.author_date, c1.committer_date)) c2 = gr.get_commit(last_commit) print('Stop: {} Author date: {} Committer date: {}'.format( c2.hash, c2.author_date, c2.committer_date)) # Unnecessary in production # Count commits to analyze print('Retrieve commits to analyze.') commits = [] for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True).traverse_commits(): commits.append(commit) print('{}) {} {}'.format(len(commits), commit.hash, commit.author_date)) commits_to_analyze = len(commits) # Open CSV file and write header saver = Saver(self.csv_file) saver.create_csv_file() saver.print_csv_header() # Traverse commits and calculate metrics commit_count = 0 # for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits(): for commit in RepositoryMining(self.repo_path, from_commit=first_commit, to_commit=last_commit, reversed_order=True).traverse_commits(): buggy = True if commit.hash in self.bic_commits else False fix = True if commit.hash in self.fix_commits else False mod_analyzed_count = 0 count_files_per_commit = len(commit.modifications) for mod in commit.modifications: # Filter out unnecessary files if filter_files is None or mod.new_path in filter_files: if mod.filename.endswith(tuple(self.allowed_extensions)): mod_analyzed_count += 1 # Update key entry on rename if mod.change_type is ModificationType.RENAME: methods = self.update_keys(methods, mod.new_path, mod.old_path) if filter_files is not None: filter_files.add(mod.old_path) count_methods_per_file = len(mod.methods) for method in mod.methods: key = self.get_unique_key(mod.new_path, mod.old_path, method.name) # For unwanted keys prevent metric calculation if filter_methods is None or key in filter_methods: lines = mod.diff_parsed method_metrics = MethodMetrics( mod.source_code, method.start_line, method.end_line, lines, buggy, fix) m_touched = method_metrics.is_touched() m_fix = method_metrics.is_fix() m_buggy = method_metrics.is_buggy() mb = MetricsBean( commit.hash, commit.author_date, mod.new_path, method.name, method.start_line, mod.change_type.name, count_files_per_commit, mod.added, mod.removed, mod.nloc, mod.complexity, mod.token_count, count_methods_per_file, method_metrics.get_added_lines(), method_metrics.get_removed_lines(), method.nloc, method.complexity, method.token_count, buggy, fix, method_metrics.get_number_of_lines(), method.fan_in, method.fan_out, method.general_fan_out, len(method.parameters), commit.author.email, m_touched, m_fix, m_buggy) # Append new bean if key not in methods: methods[key] = [] methods.get(key, []).append(mb) # Going back in the past ADD is the moment in which the a file, consequently a method, is added therefore it can be removed from the disc and flushed into the CSV to save RAM if mod.change_type is ModificationType.ADD: self.flush_methods(methods, key, saver) commit_count += 1 print( 'Methods: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}' .format(len(methods), commit_count, commits_to_analyze, commit.hash, commit.author_date.strftime('%d/%m/%Y'), len(commit.modifications), mod_analyzed_count, buggy, fix)) for key, value in methods.items(): saver.add_method_to_csv(key, value) saver.close_csv_file() print('Mining ended') return commit_count
def mine(self, start_date: datetime, stop_date: datetime) -> int: beans = MyMetricBeans() developers = {} my_commits = MyCommits() commits_to_analyze = -1 print('Mining: ' + self.repo_path) gr = GitRepository(self.repo_path) # Unnecessary in production # Count commits to analyze print('Retrieve commits to analyze.') commits = [] for commit in RepositoryMining(self.repo_path, since=start_date, to=stop_date).traverse_commits(): commits.append(commit) print('{}) {} {}'.format(len(commits), commit.hash, commit.author_date)) commits_to_analyze = len(commits) # Open CSV file and write header saver = Saver(self.csv_file, self.repo_path) saver.create_csv_file() saver.print_csv_header() # Traverse commits and calculate metrics commit_count = 0 # for commit in RepositoryMining(self.repo_path, from_commit=last_commit, to_commit=first_commit, reversed_order=True, only_modifications_with_file_types=self.allowed_extensions).traverse_commits(): for commit in RepositoryMining(self.repo_path, since=start_date, to=stop_date).traverse_commits(): buggy = True if commit.hash in self.bic_commits else False fix = True if commit.hash in self.fix_commits else False mod_analyzed_count = 0 count_files_per_commit = len(commit.modifications) for mod in commit.modifications: # Filter out unnecessary files count_methods_per_file = -1 # len(mod.methods) if mod.filename.endswith(tuple(self.allowed_extensions)): package = self.get_package(mod.new_path, mod.old_path, mod.filename) my_commits.append(commit.hash, mod.new_path, mod.old_path) mb = MetricBean( commit.hash, mod.filename, mod.new_path, mod.old_path, package, mod.change_type.name, count_files_per_commit, mod.added, mod.removed, mod.nloc, mod.complexity, mod.token_count, buggy, fix, commit.author_date, commit.author.email, commit.committer_date, commit.committer.email, count_methods_per_file) mod_analyzed_count += 1 if mod.change_type is ModificationType.ADD: # Add new key and metrics key = mod.new_path beans.add(key, mb) elif mod.change_type is ModificationType.COPY: # Add new key and metrics key = mod.new_path beans.add(key, mb) print("CASE COPY: {} {}".format( mod.new_path, mod.old_path)) elif mod.change_type is ModificationType.DELETE: # Flush in file and remove key key = mod.old_path beans.add(key, mb) commit_list_of_file = beans.get(key) saver.flush(commit_list_of_file, beans, developers, my_commits) beans.remove(key) elif mod.change_type is ModificationType.RENAME: # Update key and metrics old_key = mod.old_path new_key = mod.new_path beans.add(old_key, mb) beans.update_key(old_key, new_key) else: # Update metrics key = mod.new_path beans.add(key, mb) if commit.author.email in developers: developers[commit.author.email] += mod.added else: developers[commit.author.email] = mod.added commit_count += 1 print( 'Files: {:>8} | Commit {:>6}/{:<6} {} Date: {} Mods: {:>4}/{:<4} | Bug: {} Fix: {}' .format(beans.get_count(), commit_count, commits_to_analyze, commit.hash, commit.author_date.strftime('%d/%m/%Y'), count_files_per_commit, mod_analyzed_count, buggy, fix)) # Flush everything else print("Save metrics, it's require time!") count = 1 key_number = len(beans.get_keys()) for key in beans.get_keys(): print("Save: {}/{}".format(count, key_number)) saver.flush(beans.get(key), beans, developers, my_commits) count += 1 saver.close_csv_file() print('Mining ended') return commit_count