def _commits_and_issues(repo, issues): def replace(chars_to_replace, replacement, s): temp_s = s for c in chars_to_replace: temp_s = temp_s.replace(c, replacement) return temp_s def get_bug_num_from_comit_text(commit_text, issues_ids): text = replace("[]?#,:(){}", "", commit_text.lower()) text = replace("-_", " ", text) for word in text.split(): if word.isdigit(): if word in issues_ids: return word return "0" commits = [] java_commits = DataExtractor._get_commits_files(repo) for git_commit in java_commits: bug_id = "0" if all(list(map(lambda x: not x.is_java, java_commits[git_commit]))): commit = Commit.init_commit_by_git_commit(git_commit, bug_id, None, java_commits[git_commit], False) commits.append(commit) continue try: commit_text = DataExtractor._clean_commit_message(git_commit.summary) except: continue bug_id = get_bug_num_from_comit_text(commit_text, issues.keys()) commits.append( Commit.init_commit_by_git_commit(git_commit, bug_id, issues.get(bug_id), java_commits[git_commit])) return commits
def _commits_and_issues(project, repo, jira_issues): issues = dict(map(lambda x: (x.issue_id, x), jira_issues)) issues_dates = sorted(list( map(lambda x: (x, issues[x].creation_time), issues)), key=lambda x: x[1], reverse=True) def replace(chars_to_replace, replacement, s): temp_s = s for c in chars_to_replace: temp_s = temp_s.replace(c, replacement) return temp_s def get_bug_num_from_comit_text(commit_text, issues_ids): text = replace("[]?#,:(){}'\"", "", commit_text.lower()) text = replace("-_.=", " ", text) text = text.replace('bug', '').replace('fix', '') for word in text.split(): if word.isdigit(): if word in issues_ids: return word return "0" commits = [] java_commits = DataExtractor._get_commits_files(project, repo) for commit_sha in java_commits: git_commit = repo.commit(commit_sha) bug_id = "0" if all(list(map(lambda x: not x.is_java, java_commits[commit_sha]))): commit = Commit.init_commit_by_git_commit( git_commit, bug_id, None, java_commits[commit_sha], False) commits.append(commit) continue try: commit_text = DataExtractor._clean_commit_message( git_commit.message) except Exception as e: continue ind = 0 for ind, (issue_id, date) in enumerate(issues_dates): date_ = date if date_.tzinfo: date_ = date_.replace(tzinfo=None) if git_commit.committed_datetime.replace(tzinfo=None) > date_: break issues_dates = issues_dates[ind:] bug_id = get_bug_num_from_comit_text( commit_text, set(map(lambda x: x[0], issues_dates))) commits.append( Commit.init_commit_by_git_commit(git_commit, bug_id, issues.get(bug_id), java_commits[commit_sha])) return commits
def commits_and_issues(repo, issues): def replace(chars_to_replace, replacement, s): temp_s = s for c in chars_to_replace: temp_s = temp_s.replace(c, replacement) return temp_s def get_bug_num_from_comit_text(commit_text, issues_ids): text = replace("[]?#,:(){}", "", commit_text.lower()) text = replace("-_", " ", text) for word in text.split(): if word.isdigit(): if word in issues_ids: return word return "0" commits = [] issues_ids = map(lambda issue: issue.split("-")[1], issues) for git_commit in repo.iter_commits(): commit_text = clean_commit_message(git_commit.summary) commits.append( Commit.init_commit_by_git_commit( git_commit, get_bug_num_from_comit_text(commit_text, issues_ids))) return commits
def __init__(self, git_tag, _files): self._commit = Commit.init_commit_by_git_commit(git_tag.commit, files=[git_tag.name]) self._name = os.path.normpath(git_tag.name) self.committed_files = _files self.files = set( filter( lambda x: x.endswith(".java"), git_tag.commit.repo.git.ls_tree("-r", "--name-only", git_tag.name).split())) self.version_files = None
tag.tag._commit._commit_date).strftime("%Y-%m-%d") ]) def main(out_file, gitPath, jira_url, jira_project_name): commits = get_data(jira_project_name, jira_url, gitPath) with open(out_file, "wb") as f: writer = csv.writer(f) writer.writerows([c.to_list() for c in commits]) if __name__ == "__main__": repo = git.Repo(r"c:\temp\tika") versions = get_repo_versions(r"c:\temp\tika") tags_commits = get_commits_between_versions( map(lambda c: Commit.init_commit_by_git_commit(c, 0), list(repo.iter_commits())[:1000]), versions) tags = [] for tag in tags_commits: tags.append(VersionInfo(tag, tags_commits[tag])) import apache_repos from caching import REPOSIROTY_DATA_DIR VERSIONS = os.path.join(REPOSIROTY_DATA_DIR, r"apache_versions") repos_and_jira = apache_repos.get_apache_repos_data() for repo, jira_key in repos_and_jira: if not os.path.exists(repo): print "start git clone https://github.com/apache/{0}.git".format( os.path.basename(repo)) continue try: if os.path.exists(os.path.join(VERSIONS, jira_key) + ".csv"):
def __init__(self, repo, git_tag): self._commit = Commit.init_commit_by_git_commit(git_tag.commit, 0) self._name = git_tag.name self.version_files = version_files("{0}_{1}".format(os.path.basename(os.path.dirname(repo.git_dir)), git_tag.name), git_tag, repo)