def print_prs(project_flag): """ Calculates the number of merged and closed pull requests, as well as the high frequency pull requesters, and prints to stdout. Takes in a flag, defined by constants.py, that determines whether to look at stack repos or ml repos. """ constants = return_constants(project_flag) repos = constants["repos"] directory = constants["pulls-fpath"] for repo_dict in repos: repo = repo_dict["name"] owner = repo_dict["user"] print repo, owner # grab pull requests pulls = json.load( open(os.path.join(directory, "{}_pulls.json".format(repo)))) print 'num_pulls,{}'.format(len(pulls)) # sort merged = 0 closed = 0 open_pulls = 0 merged_users = {} closed_users = {} for p in pulls: merged_at = p["merged_at"] closed_at = p["closed_at"] username = p["user"]["login"] # make an exception for systemml if closed_at != None and repo == systemml_name: merged_at = 1 if is_merged_systemml(p) else None # count if merged_at != None: merged += 1 merged_users[username] = merged_users.get(username, 0) + 1 elif closed_at != None: closed += 1 closed_users[username] = closed_users.get(username, 0) + 1 else: open_pulls += 1 # sort all users who have had merged prs merged_users_str = sort_by_val_return_str(merged_users) # sort all users that have closed prs (unmerged) closed_users_str = sort_by_val_return_str(closed_users) # statistics for that repo print 'num_merged,{}'.format(merged) print 'merged_users,{}'.format(merged_users_str) print 'num_open,{}'.format(open_pulls) print 'num_closed,{}'.format(closed) print 'closed_users,{}'.format(closed_users_str) print ''
def print_prs(project_flag): """ Calculates the number of merged and closed pull requests, as well as the high frequency pull requesters, and prints to stdout. Takes in a flag, defined by constants.py, that determines whether to look at stack repos or ml repos. """ constants = return_constants(project_flag) repos = constants["repos"] directory = constants["pulls-fpath"] for repo_dict in repos: repo = repo_dict["name"] owner = repo_dict["user"] print repo, owner # grab pull requests pulls = json.load(open(os.path.join(directory, "{}_pulls.json".format(repo)))) print 'num_pulls,{}'.format(len(pulls)) # sort merged = 0 closed = 0 open_pulls = 0 merged_users = {} closed_users = {} for p in pulls: merged_at = p["merged_at"] closed_at = p["closed_at"] username = p["user"]["login"] # make an exception for systemml if closed_at != None and repo == systemml_name: merged_at = 1 if is_merged_systemml(p) else None # count if merged_at != None: merged += 1 merged_users[username] = merged_users.get(username, 0) + 1 elif closed_at != None: closed += 1 closed_users[username] = closed_users.get(username, 0) + 1 else: open_pulls += 1 # sort all users who have had merged prs merged_users_str = sort_by_val_return_str(merged_users) # sort all users that have closed prs (unmerged) closed_users_str = sort_by_val_return_str(closed_users) # statistics for that repo print 'num_merged,{}'.format(merged) print 'merged_users,{}'.format(merged_users_str) print 'num_open,{}'.format(open_pulls) print 'num_closed,{}'.format(closed) print 'closed_users,{}'.format(closed_users_str) print ''
def count(flag): global constants_dict constants_dict = return_constants(flag) for repo in constants_dict["repos"]: #print fn # obtain a dictionary of commiters: # commits : commit_dict = count_commits_per_user(return_filename(repo)) #print len(commit_dict.keys()) output_file = "committer_csvs/{}-{}-dict.csv".format(repo["name"], repo["user"]) with open(output_file, 'w') as csv_file: writer = csv.writer(csv_file) print "\n {} - {} \n".format(repo["name"], repo["user"]) for k, v in commit_dict.items(): writer.writerow([k, v]) print k + "," + str(v)
def get_pull_requests(project=ML): constants = return_constants(project) repo_list = constants["repos"] output_dir = constants["pulls-fpath"] # record the date of data retrieval date_file = open(os.path.join(output_dir, "date.txt"), 'w') date_file.write(CURRENT_DATE.strftime("%b %d, %Y")) date_file.close() for repo in repo_list: pull_req = get_pulls(repo) print "writing data..." f = open(os.path.join(output_dir, "{}_pulls.json".format(repo["name"])), "w") json.dump(pull_req, f) f.close() print "done writing data."
def get_pull_requests(project=ML): constants = return_constants(project) repo_list = constants["repos"] output_dir = constants["pulls-fpath"] # record the date of data retrieval date_file = open(os.path.join(output_dir, "date.txt"), 'w') date_file.write(CURRENT_DATE.strftime("%b %d, %Y")) date_file.close() for repo in repo_list: pull_req = get_pulls(repo) print "writing data..." f = open( os.path.join(output_dir, "{}_pulls.json".format(repo["name"])), "w") json.dump(pull_req, f) f.close() print "done writing data."
def herf(flag): global constants_dict constants_dict = return_constants(flag) import doctest doctest.testmod() print 'Overall Herfindahl Indices' for item in sorted(calculateOverall().items()): print "{:<30}\t{}".format(item[0], item[1]) print '\nPer Year' hiSortedByYear = sorted(calculateYear().items()) index = 0 sortedYears = sorted(hiSortedByYear[0][1].keys()) print "{:<12}".format("Year"), for item in hiSortedByYear: print "{:<30}".format(item[0]), while(index < len(sortedYears)): currYear = sortedYears[index] print "\n{:<12}".format(currYear), for item in hiSortedByYear: print "{:<30}".format(item[1][currYear]), index+=1 print '' print '\nPer Month' hiSortedByMonth = sorted(calculateMonth().items()) index = 0 sortedMonths = sorted(hiSortedByMonth[0][1].keys()) print "{:<12}".format("Month"), for item in hiSortedByMonth: print "{:<30}".format(item[0]), while(index < len(sortedMonths)): currMonth = sortedMonths[index] print "\n{} {:<7}".format(currMonth/100, currMonth%100), for item in hiSortedByMonth: print "{:<30}".format(item[1][currMonth]), index+=1
def herf(flag): global constants_dict constants_dict = return_constants(flag) import doctest doctest.testmod() print 'Overall Herfindahl Indices' for item in sorted(calculateOverall().items()): print "{:<30}\t{}".format(item[0], item[1]) print '\nPer Year' hiSortedByYear = sorted(calculateYear().items()) index = 0 sortedYears = sorted(hiSortedByYear[0][1].keys()) print "{:<12}".format("Year"), for item in hiSortedByYear: print "{:<30}".format(item[0]), while (index < len(sortedYears)): currYear = sortedYears[index] print "\n{:<12}".format(currYear), for item in hiSortedByYear: print "{:<30}".format(item[1][currYear]), index += 1 print '' print '\nPer Month' hiSortedByMonth = sorted(calculateMonth().items()) index = 0 sortedMonths = sorted(hiSortedByMonth[0][1].keys()) print "{:<12}".format("Month"), for item in hiSortedByMonth: print "{:<30}".format(item[0]), while (index < len(sortedMonths)): currMonth = sortedMonths[index] print "\n{} {:<7}".format(currMonth / 100, currMonth % 100), for item in hiSortedByMonth: print "{:<30}".format(item[1][currMonth]), index += 1
def get_commits(projects=ML): constants = return_constants(projects) #change this if necessary #directory dirName = constants["commits-fpath"] #getting correct repos repo_list = constants['repos'] # record the date of data retrieval date_file = open(os.path.join(dirName, "date.txt"), 'w') date_file.write(CURRENT_DATE.strftime("%b %d, %Y")) date_file.close() #getting commits for repo_data in repo_list: commits = get_all_commits(repo_data) print "writing data..." output_file = open(os.path.join(dirName, return_filename(repo_data)), 'w') json.dump(commits, output_file) output_file.close() print "done writing data."
Currently not going to extend to stack since this code is not in use. """ import repo_info import json import sys, os sys.path.insert(0, os.path.abspath(os.path.join('..'))) from Config.constants import ML, STACK, return_constants dir = 'mlcontrib/' # names = open(dir+"files.txt", 'r').read() # mlFileNames = names.split() # print mlFileNames constants = return_constants(ML) reponames = constants["repos"] #matrix of projects (rows) and contributors (columns) matrix = [] #collaborator name : index in matrix allContributors = {} #(project name, projectOwner) : index in matrix projects = {} #parentrepo : [list of repos forked from parent] forkFamilyTree = {} ###