def getcommitinfo(repoid, NEWREPO_xl):
    commit_url = "https://api.github.com/repositories/" + str(
        repoid) + "/commits?per_page=100"
    while commit_url:
        commit_req = getGitHubapi(commit_url, PW_CSV, LOG_CSV)
        if commit_req:
            commit_json = commit_req.json()
            for commit in commit_json:
                commit_row = ghparse_row(commit,
                                         "sha",
                                         "commit*author*name",
                                         "commit*author*email",
                                         "commit*author*date",
                                         "commit*committer*name",
                                         "commit*committer*email",
                                         "commit*committer*date",
                                         "commit*comment_count",
                                         "commit*message",
                                         prespace=1)
                c_list = getmorecommitinfo(commit['url'])
                for e in c_list:
                    commit_row.append(e)


#                appendrowincsv(NEWREPO_CSV, commit_row)
                appendrowindf(NEWREPO_xl, commit_row)
            commit_url = ghpaginate(commit_req)
        else:
            print("Error getting commit info ", commit_url)
            with open(LOG_CSV, 'at', encoding='utf-8', newline="") as loglist:
                log_handle = csv.writer(loglist)
                log_handle.writerow(
                    ["Error getting commit", commit_url, "UNKNOWN"])
            return 1
    return 0
Example #2
0
def getcommitinfo(repoid,write_handle):
    commit_url = "https://api.github.com/repositories/"+str(repoid)+"/commits?per_page=100"
    while commit_url:
        commit_req = getGitHubapi(commit_url,PW_CSV,LOG_CSV)
        if commit_req:
            commit_json = commit_req.json()
            for commit in commit_json:
                commit_row = ghparse_row(commit,"sha", "commit*author*name","commit*author*email","commit*author*date", "commit*committer*name","commit*committer*email","commit*committer*date","commit*message","commit*comment_count","commit*verification","url","parents", prespace = 1)
                write_handle.writerow(commit_row)  
            commit_url = ghpaginate(commit_req)
        else:
            print("Error getting commit info ",commit_url)
            with open(LOG_CSV, 'at', encoding = 'utf-8', newline ="") as loglist:
                log_handle = csv.writer(loglist)
                log_handle.writerow(["Error getting commit",commit_url,"UNKNOWN"])
            return
def getmorecommitinfo(c_url):
    """Get data on individual commit"""
    commit_row = []
    del commit_row[:]
    commit_res = getGitHubapi(c_url, PW_CSV, LOG_CSV)

    if commit_res is None:
        print("No commit information available", c_url)
        return ["", "", "", "", ""]
    commit_json = commit_res.json()
    commit_row = ghparse_row(commit_json,
                             "stats*total",
                             "stats*additions",
                             prespace=0)
    parents = commit_json['parents']
    p_no = 0
    for parent in parents:
        p_no = p_no + 1
    commit_row.append(p_no)

    files = commit_json['files']
    f_name = []
    f_stat = []
    f_pat = []
    f_no = 0
    del f_name[:]
    del f_stat[:]
    del f_pat[:]

    for file in files:
        f_no = f_no + 1
        f_name.append(file['filename'])
        f_stat.append(file['status'])
        if "patch" in file:
            f_pat.append(file['patch'])
        else:
            f_pat.append("")

    commit_row.append(f_no)
    commit_row.append(f_name)
    commit_row.append(f_stat)
    commit_row.append(f_pat)
    return commit_row
Example #4
0
def main():
    global DF_REPO 
    global DF_COUNT
    search_key = ['us']#,'usa','states','america','canada','california','ca']
    period = ['2018-05-31..2019-01-01','2018-01-01..2018-06-01',
              '2017-05-31..2018-01-01','2017-01-01..2017-06-01',
              '2016-05-31..2017-01-01','2016-01-01..2016-06-01',
              '2015-05-31..2016-01-01','2015-01-01..2015-06-01',
              '2014-05-31..2015-01-01','2014-01-01..2014-06-01',
              '2013-05-31..2014-01-01','2013-01-01..2013-06-01',
              '2012-05-31..2013-01-01','2012-01-01..2012-06-01',
              '2011-05-31..2012-01-01','2011-01-01..2011-06-01',
              '2010-05-31..2011-01-01','2010-01-01..2010-06-01',
              '2009-05-31..2010-01-01','2009-01-01..2009-06-01',
              '2008-05-31..2009-01-01','2008-01-01..2008-06-01',]

    for loc in search_key:
        for p in period:        
            search_url = "https://api.github.com/search/users?q=repos:%3E5+location%3A"+loc+"+created:"+p+"+type:user&per_page=100"
            
            while search_url:
                user_res = getGitHubapi(search_url,PW_CSV,LOG_CSV)
                user_json = user_res.json()

                if int(user_json['total_count']) > 1000:
                    with open(LOG_CSV, 'at') as logobj:
                        l_data = list()
                        log = csv.writer(logobj)
                        l_data.append("Search Results Exceeds 1000")
                        l_data.append(search_url)
                        l_data.append(user_json['total_count'])
                        log.writerow(l_data)
                    print(search_url,"  ",user_json['total_count'])
                for user in user_json["items"]:
                    user_row = ghparse_row(user,"login", "id")
                    appendrowindf(user_xl, user_row)
                search_url = ghpaginate(user_res)
                
    if DF_COUNT < MAX_ROWS_PERWRITE:
        df = pd.read_excel(user_xl,error_bad_lines=False,header= 0, index = False)
        df= df.append(DF_REPO, ignore_index = True)
        df.to_excel(user_xl, index = False) 
def getcommitinfo(repoid, NEWREPO_xl, owner, name):
    commit_url = "https://api.github.com/repositories/" + str(
        repoid) + "/commits?per_page=100"
    while commit_url:
        commit_req = getGitHubapi(commit_url, PW_CSV, LOG_CSV)
        if (commit_req is None) and (
                commit_url == "https://api.github.com/repositories/" +
                str(repoid) + "/commits?per_page=100"):
            print("Repo ID did not work. Trying owner/name")
            commit_url = "https://api.github.com/repos/" + str(
                owner) + "/" + str(name) + "/commits?per_page=100"
            commit_req = getGitHubapi(commit_url, PW_CSV, LOG_CSV)
        if commit_req:
            commit_json = commit_req.json()
            for commit in commit_json:
                commit_row = ghparse_row(commit,
                                         "sha",
                                         "commit*author*name",
                                         "commit*author*email",
                                         "commit*author*date",
                                         "commit*committer*name",
                                         "commit*committer*email",
                                         "commit*committer*date",
                                         "commit*comment_count",
                                         "commit*message",
                                         prespace=1)
                commit_datetime = datetime.strptime(
                    commit['commit']['author']['date'], "%Y-%m-%dT%H:%M:%SZ")
                if int(commit_datetime.year) < 2016:
                    c_list = getmorecommitinfo(commit['url'])
                    for e in c_list:
                        commit_row.append(e)
                appendrowindf(NEWREPO_xl, commit_row)
            commit_url = ghpaginate(commit_req)
        else:
            print("Error getting commit info ", commit_url)
            with open(LOG_CSV, 'at', encoding='utf-8', newline="") as loglist:
                log_handle = csv.writer(loglist)
                log_handle.writerow(
                    ["Error getting commit", commit_url, "UNKNOWN"])
            return 1
    return 0
Example #6
0
def getrepoinfo(NEWREPO_CSV):
    """Update the repo inforation with,  PUSHED,STARS, SUBSCRIBERS,  FORKS, SIZE, LICENCE """
    with open(NEWREPO_CSV, 'wt', encoding = 'utf-8', newline='') as writelist:
        write_handle = csv.writer(writelist)
        for org in org_list:
            repo_url = 'https://api.github.com/orgs/'+org+'/repos?per_page=100&page=1'            
            while repo_url:
                repoid_req = getGitHubapi(repo_url,PW_CSV,LOG_CSV)
#                print(repoid_req.headers['link'])
                if repoid_req:
                    repo_json = repoid_req.json()
                    for repo in repo_json:
                        repo_row = ghparse_row(repo,"id", "full_name","description","fork","url","created_at","updated_at","pushed_at","homepage","size","stargazers_count","watchers_count","language","has_issues","has_projects","has_downloads","has_wiki","has_pages","forks_count","mirror_url","archived","disabled","open_issues_count","license*name","forks","open_issues","watchers","default_branch","permissions")
                        write_handle.writerow(repo_row)
                        # get commits
                        #getcommitinfo(repo['id'],write_handle)
                        #end get commits
                    repo_url = ghpaginate(repoid_req)
                else: break
                print(repo_url)
            print(req_json)
            return 404
        
        if req_json['data']['search']['pageInfo']['hasNextPage']:     
            endc= req_json['data']['search']['pageInfo']['endCursor']
        else:
            end = True   
            
        users = req_json['data']['search']['edges']

        for user in users:
            user_row = list()
            if(user['node']):
<<<<<<< HEAD
                user_row = ghparse_row(user,"node*login", "node*name", "node*email", "node*company", "node*bio", "node*location",
                                       "node*createdAt", "node*isHireable", "node*followers*totalCount", "node*following*totalCount",
                                       "node*repositories*totalCount")
                if user['node']['sponsorsListing']:               
                    user_row.append(user['node']['sponsorsListing']['createdAt'])
                    user_row.append(user['node']['sponsorsListing']['shortDescription'])
                    user_row.append(user['node']['sponsorsListing']['name'])
                    user_row.append(user['node']['sponsorsListing']['tiers']['totalCount'])
                    user_row.append(user['node']['sponsorsListing']['tiers']['edges'])
                    user_row.append(user['node']['sponsorshipsAsMaintainer']['totalCount'])
                    user_row.append(user['node']['sponsorshipsAsMaintainer']['nodes'])
                else: user_row.append("")
=======
                print(user['node']['name'])
                if user['node']['sponsorsListing']:
                    print(user['node']['name']," ",user['node']['sponsorsListing']['createdAt'])
                user_row.append(user['node']['name'])
Example #8
0
def run_query(loc, period,user_xl): 
    """ A simple function to use requests.post to make the API call. Note the json= section."""
    q = "location:"+loc+" repos:>5 created:"+period
    
    query = """
    query{
      search(query: \""""+q+"""\", type: USER, first: 1) {
        userCount
        pageInfo {
          startCursor
          hasNextPage
        }}}"""    
    try:
        request = requests.post('https://api.github.com/graphql', json={'query':query}, headers=headers)
        req_json = request.json()
        endc = req_json['data']['search']['pageInfo']['startCursor']
    except:
        print("Error getting starting cursor")
        print(req_json)
        return 404
    
    end = False
    while not end:
        query = """
query($cursor:String! ) {
  rateLimit {
    cost
    remaining
    resetAt
  }
  search(query: \""""+q+"""\", type: USER, first: 100, after:$cursor) {
    userCount
    pageInfo {
      endCursor
      hasNextPage
    }
    edges {
      node {
        ... on User {
          login
          name
          id
          email
          company
          bio
          location
          createdAt
          isHireable
          followers {
            totalCount
          }
          following {
            totalCount
          }
          repositories {
            totalCount
          }
          sponsorsListing {
            createdAt
            shortDescription
            name
            tiers(first: 100) {
              totalCount
              edges {
                node {
                  name
                  description
                  monthlyPriceInDollars
                  updatedAt
                }
              }
            }
          }
          sponsorshipsAsMaintainer(first: 100) {
            totalCount
            nodes {
              createdAt
              sponsor {
                login
              }
            }
          }
        }
      }
    }
  }
}
"""
        variables = {
                 "cursor" : endc
                 }
            
        body = {
                "query": query,
                "variables": variables
                    }
        try:
            request = requests.post('https://api.github.com/graphql', json=body, headers=headers)
            req_json = request.json()
            print(loc," ",period," ",req_json['data']['search']['userCount'] )
            if  int(req_json['data']['search']['userCount']) > 1000:
                # log if the total user count is greater than 1000
                with open(LOG_CSV, 'at') as logobj:                    
                    log = csv.writer(logobj)
                    l_data = list()
                    l_data.append("Search Results Exceeds 1000")
                    l_data.append(loc)
                    l_data.append(period)
                    l_data.append(req_json['data']['search']['userCount'])
                    log.writerow(l_data)
            print(req_json['data']['rateLimit']['remaining'])
        except:
            print("Error running graphql")
            end = True
            print(req_json)
            return 404
        
        if req_json['data']['search']['pageInfo']['hasNextPage']:     
            endc= req_json['data']['search']['pageInfo']['endCursor']
        else:
            end = True   
            
        users = req_json['data']['search']['edges']

        for user in users:
            user_row = list()
            if(user['node']):
                user_row = ghparse_row(user,"node*login", "node*name", "node*email", "node*company", "node*bio", "node*location",
                                       "node*createdAt", "node*isHireable", "node*followers*totalCount", "node*following*totalCount",
                                       "node*repositories*totalCount")
                if user['node']['sponsorsListing']:               
                    user_row.append(user['node']['sponsorsListing']['createdAt'])
                    user_row.append(user['node']['sponsorsListing']['shortDescription'])
                    user_row.append(user['node']['sponsorsListing']['name'])
                    user_row.append(user['node']['sponsorsListing']['tiers']['totalCount'])
                    user_row.append(user['node']['sponsorsListing']['tiers']['edges'])
                    user_row.append(user['node']['sponsorshipsAsMaintainer']['totalCount'])
                    user_row.append(user['node']['sponsorshipsAsMaintainer']['nodes'])
                else: user_row.append("")
            appendrowindf(user_xl, user_row)
        

    return 0
        sp_url = row[0].split('/')
        if len(sp_url) > 5:
            commit_url = 'https://api.github.com/repos/' + sp_url[
                3] + '/' + sp_url[4] + '/commits/' + sp_url[6]

            commit_req = getGitHubapi(commit_url, PW_CSV, LOG_CSV)
            if commit_req:
                commit = commit_req.json()

                commit_row = ghparse_row(commit,
                                         "sha",
                                         "commit*author*date",
                                         "commit*message",
                                         "commit*comment_count",
                                         "commit*author*name",
                                         "commit*author*email",
                                         "commit*committer*date",
                                         "commit*url",
                                         "parents",
                                         "commit*verification",
                                         "stats",
                                         "files",
                                         prespace=0)
                commit_row[-1] = len(commit_row[-1])
                df = df.append(pd.Series(commit_row),
                               sort=False,
                               ignore_index=True)

            else:
                print("Error getting commit info ", commit_url)
                commit_row.append('', '', ',')
                commit_row[-1] = len(commit_row[-1])