Esempio n. 1
0
def print_prs(project_flag):
    """
    Calculates the number of merged and closed pull requests, as well as the high
    frequency pull requesters, and prints to stdout. Takes in a flag, defined by
    constants.py, that determines whether to look at stack repos or ml repos.
    """
    constants = return_constants(project_flag)
    repos = constants["repos"]
    directory = constants["pulls-fpath"]
    for repo_dict in repos:
        repo = repo_dict["name"]
        owner = repo_dict["user"]
        print repo, owner

        # grab pull requests
        pulls = json.load(
            open(os.path.join(directory, "{}_pulls.json".format(repo))))
        print 'num_pulls,{}'.format(len(pulls))

        # sort
        merged = 0
        closed = 0
        open_pulls = 0
        merged_users = {}
        closed_users = {}
        for p in pulls:
            merged_at = p["merged_at"]
            closed_at = p["closed_at"]
            username = p["user"]["login"]
            # make an exception for systemml
            if closed_at != None and repo == systemml_name:
                merged_at = 1 if is_merged_systemml(p) else None

            # count
            if merged_at != None:
                merged += 1
                merged_users[username] = merged_users.get(username, 0) + 1
            elif closed_at != None:
                closed += 1
                closed_users[username] = closed_users.get(username, 0) + 1
            else:
                open_pulls += 1

        # sort all users who have had merged prs
        merged_users_str = sort_by_val_return_str(merged_users)

        # sort all users that have closed prs (unmerged)
        closed_users_str = sort_by_val_return_str(closed_users)

        # statistics for that repo
        print 'num_merged,{}'.format(merged)
        print 'merged_users,{}'.format(merged_users_str)
        print 'num_open,{}'.format(open_pulls)
        print 'num_closed,{}'.format(closed)
        print 'closed_users,{}'.format(closed_users_str)
        print ''
Esempio n. 2
0
def print_prs(project_flag):
    """
    Calculates the number of merged and closed pull requests, as well as the high
    frequency pull requesters, and prints to stdout. Takes in a flag, defined by
    constants.py, that determines whether to look at stack repos or ml repos.
    """
    constants = return_constants(project_flag)
    repos = constants["repos"]
    directory = constants["pulls-fpath"]
    for repo_dict in repos:
        repo = repo_dict["name"]
        owner = repo_dict["user"]
        print repo, owner

        # grab pull requests
        pulls = json.load(open(os.path.join(directory, "{}_pulls.json".format(repo))))
        print 'num_pulls,{}'.format(len(pulls))

        # sort
        merged = 0
        closed = 0
        open_pulls = 0
        merged_users = {}
        closed_users = {}
        for p in pulls:
            merged_at = p["merged_at"]
            closed_at = p["closed_at"]
            username = p["user"]["login"]
            # make an exception for systemml
            if closed_at != None and repo == systemml_name:
                merged_at = 1 if is_merged_systemml(p) else None

            # count
            if merged_at != None:
                merged += 1
                merged_users[username] = merged_users.get(username, 0) + 1
            elif closed_at != None:
                closed += 1
                closed_users[username] = closed_users.get(username, 0) + 1
            else:
                open_pulls += 1

        # sort all users who have had merged prs
        merged_users_str = sort_by_val_return_str(merged_users)

        # sort all users that have closed prs (unmerged)
        closed_users_str = sort_by_val_return_str(closed_users)

        # statistics for that repo
        print 'num_merged,{}'.format(merged)
        print 'merged_users,{}'.format(merged_users_str)
        print 'num_open,{}'.format(open_pulls)
        print 'num_closed,{}'.format(closed)
        print 'closed_users,{}'.format(closed_users_str)
        print ''
Esempio n. 3
0
def count(flag):
    global constants_dict
    constants_dict = return_constants(flag)
    for repo in constants_dict["repos"]:
        #print fn
        # obtain a dictionary of commiters: # commits :
        commit_dict = count_commits_per_user(return_filename(repo))
        #print len(commit_dict.keys())
        output_file = "committer_csvs/{}-{}-dict.csv".format(repo["name"], repo["user"])
        with open(output_file, 'w') as csv_file:
            writer = csv.writer(csv_file)
            print "\n {} - {} \n".format(repo["name"], repo["user"])
            for k, v in commit_dict.items():
                writer.writerow([k, v])
                print k + "," + str(v)
def get_pull_requests(project=ML):
    constants = return_constants(project)
    repo_list = constants["repos"]
    output_dir = constants["pulls-fpath"]

    # record the date of data retrieval
    date_file = open(os.path.join(output_dir, "date.txt"), 'w')
    date_file.write(CURRENT_DATE.strftime("%b %d, %Y"))
    date_file.close()

    for repo in repo_list:
        pull_req = get_pulls(repo)
        print "writing data..."
        f = open(os.path.join(output_dir, "{}_pulls.json".format(repo["name"])), "w")
        json.dump(pull_req, f)
        f.close()
        print "done writing data."
def get_pull_requests(project=ML):
    constants = return_constants(project)
    repo_list = constants["repos"]
    output_dir = constants["pulls-fpath"]

    # record the date of data retrieval
    date_file = open(os.path.join(output_dir, "date.txt"), 'w')
    date_file.write(CURRENT_DATE.strftime("%b %d, %Y"))
    date_file.close()

    for repo in repo_list:
        pull_req = get_pulls(repo)
        print "writing data..."
        f = open(
            os.path.join(output_dir, "{}_pulls.json".format(repo["name"])),
            "w")
        json.dump(pull_req, f)
        f.close()
        print "done writing data."
def herf(flag):
    global constants_dict
    constants_dict = return_constants(flag)

    import doctest
    doctest.testmod()

    print 'Overall Herfindahl Indices'
    for item in sorted(calculateOverall().items()):
        print "{:<30}\t{}".format(item[0], item[1])

    print '\nPer Year'
    hiSortedByYear = sorted(calculateYear().items())
    index = 0
    sortedYears = sorted(hiSortedByYear[0][1].keys())
    print "{:<12}".format("Year"),
    for item in hiSortedByYear:
        print "{:<30}".format(item[0]),
    while(index < len(sortedYears)):
        currYear = sortedYears[index]
        print "\n{:<12}".format(currYear),
        for item in hiSortedByYear:
            print "{:<30}".format(item[1][currYear]),
        index+=1
    print ''

    print '\nPer Month'
    hiSortedByMonth = sorted(calculateMonth().items())
    index = 0
    sortedMonths = sorted(hiSortedByMonth[0][1].keys())
    print "{:<12}".format("Month"),
    for item in hiSortedByMonth:
        print "{:<30}".format(item[0]),
    while(index < len(sortedMonths)):
        currMonth = sortedMonths[index]
        print "\n{} {:<7}".format(currMonth/100, currMonth%100),
        for item in hiSortedByMonth:
            print "{:<30}".format(item[1][currMonth]),
        index+=1
def herf(flag):
    global constants_dict
    constants_dict = return_constants(flag)

    import doctest
    doctest.testmod()

    print 'Overall Herfindahl Indices'
    for item in sorted(calculateOverall().items()):
        print "{:<30}\t{}".format(item[0], item[1])

    print '\nPer Year'
    hiSortedByYear = sorted(calculateYear().items())
    index = 0
    sortedYears = sorted(hiSortedByYear[0][1].keys())
    print "{:<12}".format("Year"),
    for item in hiSortedByYear:
        print "{:<30}".format(item[0]),
    while (index < len(sortedYears)):
        currYear = sortedYears[index]
        print "\n{:<12}".format(currYear),
        for item in hiSortedByYear:
            print "{:<30}".format(item[1][currYear]),
        index += 1
    print ''

    print '\nPer Month'
    hiSortedByMonth = sorted(calculateMonth().items())
    index = 0
    sortedMonths = sorted(hiSortedByMonth[0][1].keys())
    print "{:<12}".format("Month"),
    for item in hiSortedByMonth:
        print "{:<30}".format(item[0]),
    while (index < len(sortedMonths)):
        currMonth = sortedMonths[index]
        print "\n{} {:<7}".format(currMonth / 100, currMonth % 100),
        for item in hiSortedByMonth:
            print "{:<30}".format(item[1][currMonth]),
        index += 1
Esempio n. 8
0
def get_commits(projects=ML):
    constants = return_constants(projects) #change this if necessary

    #directory
    dirName = constants["commits-fpath"]

    #getting correct repos
    repo_list = constants['repos']

    # record the date of data retrieval
    date_file = open(os.path.join(dirName, "date.txt"), 'w')
    date_file.write(CURRENT_DATE.strftime("%b %d, %Y"))
    date_file.close()

    #getting commits
    for repo_data in repo_list:
        commits = get_all_commits(repo_data)

        print "writing data..."
        output_file = open(os.path.join(dirName, return_filename(repo_data)), 'w')
        json.dump(commits, output_file)
        output_file.close()
        print "done writing data."
Esempio n. 9
0
def get_commits(projects=ML):
    constants = return_constants(projects)  #change this if necessary

    #directory
    dirName = constants["commits-fpath"]

    #getting correct repos
    repo_list = constants['repos']

    # record the date of data retrieval
    date_file = open(os.path.join(dirName, "date.txt"), 'w')
    date_file.write(CURRENT_DATE.strftime("%b %d, %Y"))
    date_file.close()

    #getting commits
    for repo_data in repo_list:
        commits = get_all_commits(repo_data)

        print "writing data..."
        output_file = open(os.path.join(dirName, return_filename(repo_data)),
                           'w')
        json.dump(commits, output_file)
        output_file.close()
        print "done writing data."
Esempio n. 10
0
Currently not going to extend to stack since this code is not in use.
"""

import repo_info
import json
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join('..')))
from Config.constants import ML, STACK, return_constants


dir = 'mlcontrib/'
# names = open(dir+"files.txt", 'r').read()
# mlFileNames = names.split()
# print mlFileNames

constants = return_constants(ML)
reponames = constants["repos"]

#matrix of projects (rows) and contributors (columns)
matrix = []

#collaborator name : index in matrix
allContributors = {}

#(project name, projectOwner) : index in matrix
projects = {}

#parentrepo : [list of repos forked from parent]
forkFamilyTree = {}

###