def parse_file_commits(): api = GitHubAPI() with open('notebooks.csv', newline='') as csvfile: with open('commits.csv', mode='a') as csvfile2: fieldnames = ['repo', 'file', 'message'] writer = csv.DictWriter(csvfile2, fieldnames=fieldnames) data = csv.DictReader(csvfile) for row in data: repo = row['full name'] file_list = get_files(repo) for i in range(len(file_list)): if '.ipynb' in file_list[i]: commits = api.file_commits(repo, file_list[i]) if len(commits) > 2: for commit in commits: comm = commit['commit'] writer.writerow({ 'repo': repo, 'file': file_list[i], 'message': comm['message'] }) print(file_list[i]) print()
def get_files(repo): api = GitHubAPI() sha_list = get_sha_list(repo) file_list = [] for sha in sha_list: files = api.get_file(repo, sha) arr = files['tree'] for elem in arr: file_list.append(elem['path']) return file_list
def get_all_repos(startDate, endDate, timeWindow, condition): with open('notebooks.csv', mode='a') as csv_file: fieldnames = [ 'id', 'full name', 'created at', 'size', 'forks count', 'authors' ] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() api = GitHubAPI() change = timedelta(hours=timeWindow) count = 1 while startDate <= endDate: while condition: newStart = startDate + change repository = api.get_repo("Jupyter%20Notebook", str(startDate).replace(" ", "T"), str(newStart).replace(" ", "T")) print(repository) count = count + 1 startDate = newStart
def get_sha_list(repo): api = GitHubAPI() sha_list = [] commits = api.get_commit(repo) recent = commits[0] comm = recent['commit'] tree = comm['tree'] sha = tree['sha'] sha_list.append(sha) files = api.get_file(repo, sha) arr = files['tree'] for elem in arr: if elem['type'] == "tree": sha_list.append(elem['sha']) else: continue return sha_list
def get_commits(keyword): api = GitHubAPI() with open('notebooks.csv', newline='') as csvfile: with open('commits.csv', mode='a') as csvfile2: fieldnames = ['repo', 'message'] writer = csv.DictWriter(csvfile2, fieldnames=fieldnames) data = csv.DictReader(csvfile) for row in data: repo = row['full name'] commits = api.repo_commits(repo) count = 0 for commit in commits: comm = str(commit).replace("\'", "\"") if "\"message\"" in comm: if keyword in (commit['message']): message = str(commit['message']).replace(',', '.') print(repo + ': ' + message) writer.writerow({'repo': repo, 'message': message})
from github_api import GitHubAPI if __name__ == "__main__": api = GitHubAPI() # # query github api with URL https://api.github.com/repos/jquery/jquery/pulls/4406/commits # # res = api.request("repos/jquery/jquery/pulls/4406/commits") # # # query issue/pr timeline # api doc: https://developer.github.com/v3/issues/timeline/#list-timeline-events-for-an-issue # the following query the events for https://github.com/jquery/jquery/pull/4406/ # events = api.get_issue_pr_timeline("jquery/jquery", 4406) #Search repos res = api.get_repo("Jupyter%20Notebook", "2008-01-01", "2009-01-01")
import pytest import csv from github_api import GitHubAPI positive_creds = list( csv.reader(open('positive_logins.csv'), delimiter='\t', quotechar="'"))[1:] negative_creds = list( csv.reader(open('negative_logins.csv'), delimiter='\t', quotechar="'"))[1:] github = GitHubAPI() @pytest.mark.parametrize("login,password", positive_creds) def test_positive_login(login, password): assert github.try_login(login, password) @pytest.mark.parametrize("login,password", negative_creds) def test_negative_login(login, password): assert not github.try_login(login, password)
def test_get_repos_quantity(self): gh = GitHubAPI("Liam-Brew") self.assertEqual(len(gh.get_repos()), 8, "There should be 8 repos present")
def test_nonexistant_user(self): gh1 = GitHubAPI("adwdawwadwwdadwadwwdawadwadddddddddddddddddddd") self.assertEqual(gh1.run(), 404, "This user is not on GitHub")
def test_run_status_ok(self): gh = GitHubAPI("Liam-Brew") self.assertEqual(gh.run(), 200, "Run status should be ok")
def test_specific_get_commits(self): gh = GitHubAPI("Liam-Brew") gh.get_repos() commits = gh.get_commits() self.assertEqual(commits.get('CS-135'), 6, 'Repo CS-135 should have 6 commits')
def test_get_repos_names(self): gh = GitHubAPI("Liam-Brew") self.assertEqual(gh.get_repos(), ['CS-135', 'CS-284', 'GitHubApi567', 'SSW-215', 'SSW-322', 'SSW-345', 'SSW-567', 'Triangle567'], [ 'CS-135', 'CS-284', 'GitHubApi567', 'SSW-215', 'SSW-322', 'SSW-345', 'SSW-567', 'Triangle567'])
return False return False def get_source_of_cross_reference(repo, issue_id): # 33773 is a cross-reference event url = "repos/%s/issues/%s/events" % (repo, issue_id) events = api.request(url, paginate=True, state='all') for event in events: # print('repo: ' + repo + ' issue: ' + str(issue_id) + ' event: ' + event['event']) if event['event'] == 'cross-referenced': print(event['source'].get('url')) if __name__ == "__main__": api = GitHubAPI() # # query github api with URL # # res = api.request("repos/jquery/jquery/pulls/4406/commits") # # # query issue/pr timeline # events = api.get_issue_pr_timeline("jquery/jquery", 4406) # is_from_same_organization('https://github.com/facebook/react/issues/14981', 'https://github.com/facebook/redux-toolkit/issues/331') with open('data/repoList_python.txt') as f: repos = [line.rstrip() for line in f] # repos = ['loopj/android-async-http', 'Smoothieware/Smoothieware', 'mongodb/node-mongodb-native', 'python/cpython', 'triketora/women-in-software-eng', 'd3/d3', 'RestKit/RestKit', 'Atom/atom', 'D-Programming-Language/dub', 'mjmlio/mjml', 'nodejs/node'] repos = random.sample(repos, 20) # repos.append('nodejs/node') print('403 ', repos)
from github_api import GitHubAPI from datetime import date import datetime from read_repos import get_all_repos from get_commits import get_commits, parse_file_commits import csv from get_production_nb import get_production_nb_byEXT, get_production_nb_byFEAT if __name__ == "__main__": api = GitHubAPI() #get all jupyter notebooks from start date to end date, increment date # print('Getting repos...') # get_all_repos(datetime.datetime(2019, 2, 3), datetime.datetime(2019, 6, 1), 6, True) # print('done!') # print('Getting Commits...') # api.repo_commits("hdong3030/Data-Analysis") # print('done!') #get all notebooks in csv file by keyword (passed into get_commits function) # print('Getting Commits...') # get_commits("ready") # print('done!') #get commits with a filter # print('Getting Commits...') # parse_file_commits() # print('done!')