def test_user_orgs_not_found(self): """ Test whether 404 response when getting users orgs is managed """ body = read_file('data/github_request') login = read_file('data/github_login') orgs = read_file('data/github_orgs') httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL, body=body, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_USER_URL, body=login, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body=orgs, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) # Check that 404 exception getting user orgs is managed users_orgs = GitHubClient._users_orgs GitHubClient._users_orgs = {} # clean cache to get orgs using the API httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body=orgs, status=404, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) github = GitHub("zhquan_example", "repo", "aaa") issues = [issues for issues in github.fetch()] # Check that a no 404 exception getting user orgs is raised GitHubClient._users_orgs = {} httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body=orgs, status=402, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) github = GitHub("zhquan_example", "repo", "aaa") with self.assertRaises(requests.exceptions.HTTPError) as e: issues = [issues for issues in github.fetch()] GitHubClient._users_orgs = users_orgs # restore the cache
def get_github_data(self): owner, repository = CONF.repo.split('/') repo = GitHub(owner=owner, repository=repository, api_token=CONF.gh_token) data = repo.fetch('issue') self.check_if_open(data) return self.table
def getPRs(user_owner, repo_name, tokens): repo = GitHub(owner=user_owner, repository=repo_name, api_token=tokens, sleep_for_rate=True) prs = repo.fetch(category="pull_request") return prs
def test_feth_empty(self): """ Test when return empty """ body = "" login = read_file('data/github_login') httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL, body=body, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_USER_URL, body=login, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body="[]", status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) from_date = datetime.datetime(2016, 1, 1) github = GitHub("zhquan_example", "repo", "aaa") issues = [issues for issues in github.fetch(from_date=from_date)] self.assertEqual(len(issues), 0)
def getIssues(user_owner, repo_name, tokens): repo = GitHub(owner=user_owner, repository=repo_name, api_token=tokens, sleep_for_rate=True) issues = repo.fetch(category="issue") return issues
def test_fetch_more_issues(self): """ Test when return two issues """ login = read_file('data/github_login') issue_1 = read_file('data/github_issue_1') issue_2 = read_file('data/github_issue_2') httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL, body=issue_1, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '5', 'Link': '<' + GITHUB_ISSUES_URL + '/?&page=2>; rel="next", <' + GITHUB_ISSUES_URL + '/?&page=3>; rel="last"' }) httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL + '/?&page=2', body=issue_2, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '5' }) httpretty.register_uri(httpretty.GET, GITHUB_USER_URL, body=login, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '5' }) httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body="[]", status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '5' }) github = GitHub("zhquan_example", "repo", "aaa") issues = [issues for issues in github.fetch()] self.assertEqual(len(issues), 2) expected_1 = json.loads(read_file('data/github_issue_expected_1')) self.assertEqual(issues[0]['origin'], 'https://github.com/zhquan_example/repo') self.assertEqual(issues[0]['uuid'], '58c073fd2a388c44043b9cc197c73c5c540270ac') self.assertEqual(issues[0]['updated_on'], 1458035782.0) self.assertEqual(issues[0]['category'], 'issue') self.assertEqual(issues[0]['tag'], 'https://github.com/zhquan_example/repo') self.assertDictEqual(issues[0]['data'], expected_1) expected_2 = json.loads(read_file('data/github_issue_expected_2')) self.assertEqual(issues[1]['origin'], 'https://github.com/zhquan_example/repo') self.assertEqual(issues[1]['uuid'], '4236619ac2073491640f1698b5c4e169895aaf69') self.assertEqual(issues[1]['updated_on'], 1458054569.0) self.assertEqual(issues[1]['category'], 'issue') self.assertEqual(issues[1]['tag'], 'https://github.com/zhquan_example/repo') self.assertDictEqual(issues[1]['data'], expected_2)
def extract_github(own, repo): github_obj = GitHub(owner=own, repository=repo, sleep_for_rate=True, sleep_time=300) """ The method retrieves,the issues/pull requests from a GitHub repository, updated since the given date. """ for issues in github_obj.fetch(): print(issues['data'])
def repo_info(own, repo_url, d1, df, k): #repo_url = 'INFO1601' # Directory for letting Perceval clone the git repo #repo_dir = 'grimoirelab-perceval' token = -XXX #own = 'kmn5409' # ElasticSearch instance (url) #es = elasticsearch.Elasticsearch(['http://localhost:9200/']) #repo = 'grimoirelab-perceval' # Create the 'commits' index in ElasticSearch #es.indices.create('issues') # Create a Git object, pointing to repo_url, using repo_dir for cloning repo = GitHub(owner=own, repository=repo_url, api_token=token) # Fetch all commits as an iteratoir, and iterate it uploading to ElasticSearch pull_open = 0 open1 = 0 total = 0 for issues in repo.fetch(): # Create the object (dictionary) to upload to ElasticSearch # Create the object (dictionary) to upload to ElasticSearch if 'pull_request' in issues['data']: if (issues['data']['state'] == 'open'): #print("Pull Request ",p,"open")s temp = issues['data']['created_at'].split("-") year = int(temp[0]) month_num = int(temp[1]) day = int(temp[2][:2]) if (isrecent(d1, day, month_num, year)): pull_open += 1 else: if (issues['data']['state'] == 'open'): temp = issues['data']['created_at'].split("-") year = int(temp[0]) month_num = int(temp[1]) day = int(temp[2][:2]) if (isrecent(d1, day, month_num, year)): open1 += 1 #print(".") #print(summary) #summary = {'hash':issues['data']} #print({'hash':issues['data']['state']}) #es.index(index='issues', doc_type='summary', body=summary) # Upload the object to ElasticSearch #print(commit) #break total += pull_open + open1 df['Issues Open'][k] = open1 df['Pull Requests Open'][k] = pull_open print("Open Issues", open1) print("Pull Requests: ", pull_open) print() return total
def test_fetch_from_cache(self): """ Test whether a list of issues is returned from cache """ body = read_file('data/github_request') login = read_file('data/github_login') httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL, body=body, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_USER_URL, body=login, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body="[]", status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) # First, we fetch the bugs from the server, storing them # in a cache cache = Cache(self.tmp_path) github = GitHub("zhquan_example", "repo", "aaa", cache=cache) issues = [issues for issues in github.fetch()] # Now, we get the bugs from the cache. # The contents should be the same and there won't be # any new request to the server cache_issues = [ cache_issues for cache_issues in github.fetch_from_cache() ] del issues[0]['timestamp'] del cache_issues[0]['timestamp'] self.assertDictEqual(issues[0], cache_issues[0]) self.assertEqual(len(issues), len(cache_issues))
def test_fetch_from_date(self): """ Test when return from date """ login = read_file('data/github_login') body = read_file('data/github_issue_2') httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL, body=body, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_USER_URL, body=login, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body="[]", status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) from_date = datetime.datetime(2016, 3, 1) github = GitHub("zhquan_example", "repo", "aaa") issues = [issues for issues in github.fetch(from_date=from_date)] self.assertEqual(len(issues), 1) expected = json.loads(read_file('data/github_issue_expected_2')) self.assertEqual(issues[0]['origin'], 'https://github.com/zhquan_example/repo') self.assertEqual(issues[0]['uuid'], '4236619ac2073491640f1698b5c4e169895aaf69') self.assertEqual(issues[0]['updated_on'], 1458054569.0) self.assertEqual(issues[0]['category'], 'issue') self.assertEqual(issues[0]['tag'], 'https://github.com/zhquan_example/repo') self.assertDictEqual(issues[0]['data'], expected)
def test_fetch(self): """ Test whether a list of issues is returned """ body = read_file('data/github_request') login = read_file('data/github_login') orgs = read_file('data/github_orgs') httpretty.register_uri(httpretty.GET, GITHUB_ISSUES_URL, body=body, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_USER_URL, body=login, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) httpretty.register_uri(httpretty.GET, GITHUB_ORGS_URL, body=orgs, status=200, forcing_headers={ 'X-RateLimit-Remaining': '20', 'X-RateLimit-Reset': '15' }) github = GitHub("zhquan_example", "repo", "aaa") issues = [issues for issues in github.fetch()] self.assertEqual(len(issues), 1) expected = json.loads(read_file('data/github_request_expected')) self.assertEqual(issues[0]['origin'], 'https://github.com/zhquan_example/repo') self.assertEqual(issues[0]['uuid'], '58c073fd2a388c44043b9cc197c73c5c540270ac') self.assertEqual(issues[0]['updated_on'], 1454328801.0) self.assertEqual(issues[0]['category'], 'issue') self.assertEqual(issues[0]['tag'], 'https://github.com/zhquan_example/repo') self.assertDictEqual(issues[0]['data'], expected)
#! /usr/bin/env python3 import argparse from perceval.backends.core.github import GitHub # Parse command line arguments parser = argparse.ArgumentParser( description = "Simple parser for GitHub issues and pull requests" ) parser.add_argument("-t", "--token", help = "GitHub token") parser.add_argument("-r", "--repo", help = "GitHub repository, as 'owner/repo'") args = parser.parse_args() # Owner and repository names (owner, repo) = args.repo.split('/') repo = GitHub(owner=owner, repository=repo, api_token=args.token) # print all of that big dict for item in repo.fetch(): print(item, '\n')
help="GitHub repository, as 'owner/repo' or gitpath") parser.add_argument( "-d", "--create_dump", help="y for for creating json dump of data, empty for printing to terminal" ) args = parser.parse_args() from_date = datetime.datetime(2019, 6, 1, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) to_date = datetime.datetime(2020, 2, 1, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) (owner, repo) = args.repo.split('/') repo = GitHub(owner=owner, repository=repo, api_token=[str(args.token)], sleep_for_rate=True) issues_gen = repo.fetch(category=CATEGORY_ISSUE, from_date=from_date, to_date=to_date) issues = list(issues_gen) if args.create_dump == 'y': with open("github_issues.json", "w") as file: json.dump(issues, file) else: for issue in issues: for field in issue.keys(): print(str(field) + ':' + str(issue[field])) print("-----------")
#! /usr/bin/env python3 import argparse from perceval.backends.core.github import GitHub # Parse command line arguments parser = argparse.ArgumentParser( description="Simple parser for GitHub issues and pull requests") parser.add_argument("-t", "--token", help="GitHub token") parser.add_argument("-r", "--repo", help="GitHub repository, as 'owner/repo'") args = parser.parse_args() # Owner and repository names (owner, repo) = args.repo.split('/') # create a Git object, pointing to repo_url, using repo_dir for cloning repo = GitHub(owner=owner, repository=repo, api_token=[args.token]) print(repo) print(repo.fetch()) # fetch all issues/pull requests as an iteratoir, and iterate it printing # their number, and whether they are issues or pull requessts for item in repo.fetch(): if 'pull_request' in item['data']: kind = 'Pull request' else: kind = 'Issue' print(item['data']['number'], ':', kind)
parser.add_argument( "-fr", "--fromdate", help="Date that you want to fetch information from in format YYYYMMDD") parser.add_argument( "-to", "--todate", help="Date that you want to fetch information till in format YYYYMMDD") args = parser.parse_args() # Owner and repository names (owner, repo) = args.repo.split('/') repo_git_uri = "http://github.com/{}/{}.git".format(owner, repo) repo_dir = 'tmp/perceval' # Convert from and to date to datetime object fr_dt_tuple = map(int, (args.fromdate[:4], args.fromdate[4:6], args.fromdate[6:])) fr_dt = datetime.datetime(*fr_dt_tuple) to_dt_tuple = map(int, (args.todate[:4], args.todate[4:6], args.todate[6:])) to_dt = datetime.datetime(*to_dt_tuple) git_obj = Git(uri=repo_git_uri, gitpath=repo_dir) github_obj = GitHub(owner=owner, repository=repo, api_token=args.token) # Big dicts printed, can be pretty printed for convenience for commit in git_obj.fetch(): print(commit, '\n') for item in github_obj.fetch(from_date=fr_dt, to_date=to_dt): print(item, '\n')
print("Adding includes relation: " + str(commit) + " " + str(file_name)) tx.run("MERGE (a:Commit {id:$commitID}) MERGE (b:File {id:$fileName}) MERGE (a)-[:includes]->(b)", commitID=commit, fileName=file_name) def add_pr_includes_relation(tx, pr_number, file_name): print("Adding pr_includes relation: " + str(pr_number) + " " + str(file_name)) tx.run("MERGE (a:PullRequest {id:$prID}) MERGE (b:File {id:$fileName}) MERGE (a)-[:includes]->(b)", prID=pr_number, fileName=file_name) # Fetch all issues/pull requests from GitHub repo = GitHub(owner=GITHUB_REPO_OWNER, repository=GITHUB_REPO_NAME, api_token=GITHUB_API_TOKEN) for item in repo.fetch(): number = item['data']['number'] author = item['data']['user']['login'] if 'pull_request' in item['data']: session.write_transaction(add_proposes_relation, number, author) # Get files in the pr. Perceval doesn't support so send a request to GitHub API # https://developer.github.com/v3/pulls/#list-pull-requests-files prFilesUrl = GITHUB_API_URL + "/repos/%s/%s/pulls/%s/files" % (GITHUB_REPO_OWNER, GITHUB_REPO_NAME, number) headers = {'Authorization': 'token ' + GITHUB_API_TOKEN} r = requests.get(prFilesUrl, headers=headers) filesInPr = r.json() for file in filesInPr: session.write_transaction(add_pr_includes_relation, number, file['filename']) else:
#! /usr/bin/env python3 from perceval.backends.core.github import GitHub repo = GitHub(owner='elastic', repository='logstash', api_token='***') # # fetch all issues and print each title for issue in repo.fetch(): print(issue)
def fetch_github(): """ Fetches the data items(issues, pull requests and repository) from a GitHub repository """ github = GitHub(owner=owner, repository=repository, api_token=[args.token], sleep_for_rate=True) # Printing Owner and Repository print("Owner: ", owner) print("Repository: ", repository) print("Categories: ", GitHub.CATEGORIES) # Range of date between which data is to be fetched from_date = datetime.datetime(2020, 3, 8, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) to_date = datetime.datetime(2020, 3, 9, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) # Fetch Issue data issue_list_generator = github.fetch(category=CATEGORY_ISSUE, from_date=from_date, to_date=to_date, filter_classified=False) issue_list = list(issue_list_generator) # Dump the data fetched into a JSON file dump_json("./GitHub_backend/github_issue.json", issue_list) issue = issue_list[0] # Printing some features of the issue print('*' * 50) print("ISSUE") print('Category: ', issue['category']) print("Issue Count: ", len(issue_list)) print('Title: ', issue['data']['title']) print('Comments: ', issue['data']['comments']) print('Search Fields:', issue['search_fields']) print('Timestamp: ', issue['timestamp']) print('Updated on: ', issue['updated_on']) print('UUID: ', issue['uuid']) print('*' * 50) # Fetch Pull Request data pr_list_generator = github.fetch(category=CATEGORY_PULL_REQUEST, from_date=from_date, to_date=to_date, filter_classified=False) pr_list = list(pr_list_generator) # Dump the data fetched into a JSON file dump_json("./GitHub_backend/github_pr.json", pr_list) pr = pr_list[0] # Printing some features of the repository data print("PULL REQUEST") print('Category: ', pr['category']) print("Pull Request Count: ", len(pr_list)) print('Title: ', pr['data']['title']) print('Comments: ', pr['data']['comments']) print('Search Fields:', pr['search_fields']) print('Timestamp: ', pr['timestamp']) print('Updated on: ', pr['updated_on']) print('UUID: ', pr['uuid']) print('*' * 50) # Fetch repository data repo_list_generator = github.fetch(category=CATEGORY_REPO, from_date=from_date, to_date=to_date, filter_classified=False) repo_list = list(repo_list_generator) print("Number: ", len(repo_list)) # Dump the data fetched into a JSON file dump_json("./GitHub_backend/github_repo.json", repo_list) repo = repo_list[0] # Printing some features of the repository data print("REPOSITORY") print('Category: ', repo['category']) print("Repository Count: ", len(repo_list)) print('Description: ', repo['data']['description']) print('Owner: ', repo['data']['owner']['login']) # In case data is fetched from a repository item_id contains # the timestamp when the data was fetched print('Search Fields:', repo['search_fields']) print('Timestamp: ', repo['timestamp']) print('Updated on: ', repo['updated_on']) print('UUID: ', repo['uuid']) print('*' * 50)
class GithubImporter(BugTrackerImporter): """ Specialized importer class for importing bug information from github """ users = {} def __init__(self, bt_info): super().__init__(bt_info) url = bt_info.bug_tracker.baseurl if url.startswith('https://github.com/'): owner = url.replace('https://github.com/', '') elif url.startswith('http://github.com/'): owner = url.replace('http://github.com/', '') else: owner = url self.backend = GitHub(owner=owner, repository=self.object.product, api_token=bt_info.bug_tracker.api_token, sleep_for_rate=True) def get_user(self, userdata): username = userdata['login'] try: return self.users[username] except KeyError: pass # Sometimes github sends over these keys with null values retval = get_participant( userdata.get('name') or '', userdata.get('email') or '') self.users[username] = retval return retval @transaction.atomic def _run(self): issues_iter = self.backend.fetch() # Import all issues for issue_d in issues_iter: issue = issue_d['data'] closed_at = issue['closed_at'] if closed_at: closed_at = str_to_datetime(issue['closed_at']) bug, created = self.object.bugs.get_or_create( bug_id=str(issue['number']), defaults={ 'close_date': closed_at, 'severity': None }) logger.info("%s bug [%s]", "Imported" if created else "Found", bug) if created: bug_create_time = str_to_datetime(issue['created_at']) comment = bug.comments.create(comment_id='VIRTUAL-1', author=self.get_user( issue['user_data']), timestamp=bug_create_time) self.record_timestamp(bug_create_time) logger.info("Imported bug body as [%s]", comment) # TODO: not supported yet comments = [] # Import comments for comment in comments: issue_number = os.path.basename(comment['issue_url']) bug = self.object.bugs.get(bug_id=issue_number) extrafields = { 'author': self.get_user(comment['user']['login']), 'timestamp': str_to_datetime(comment['created_at']) } comment, created = bug.comments.get_or_create( comment_id=comment['id'], defaults=extrafields) logger.info("%s comment [%s]", "Imported" if created else "Found", comment) if not created: for key, value in extrafields.items(): oldvalue = getattr(comment, key) if oldvalue != value: logger.warning( "Updating field [%s] for comment [%s] " "(%s -> %s)", key, comment, oldvalue, value) setattr(comment, key, value) self.record_timestamp(comment.timestamp) else: self.record_timestamp(comment.timestamp)
# Categories of information which can be retrieved print(github_backend.categories) print(github_backend.repository) print(github_backend.origin) # Analyzing Pull Request Information # Datetime range in which ISSUEs information is to be fetched from_date = datetime(2019, 1, 1) to_date = datetime(2019, 2, 2) # Calling fetch method range_issues = github_backend.fetch(category=CATEGORY_ISSUE, from_date=from_date, to_date=to_date) range_issues_list = list(range_issues) n_issues = len(range_issues_list) print("NUMBER OF ISSUES: ", n_issues) last_issue = range_issues_list[n_issues - 1] print("Attributes of issue JSON document: ", last_issue.keys()) with open("issue.json", "w") as write_file: json.dump(last_issue, write_file) for issue in range_issues_list: print("-" * 100) # Issue Title
print("Started " + repo_name) commit_count = 0 issue_count = 0 pull_count = 0 git_commit = Git(uri=repo_url, gitpath=repo_dir) #count no of commits for commit in git_commit.fetch(): date_diff = datetime.now() - datetime.strptime( commit['data']['CommitDate'][:-6], "%a %b %d %H:%M:%S %Y") if date_diff.days <= 90: commit_count += 1 items = GitHub(owner=parent, repository=repo_name, api_token=github_token) #count no of pull_requests,issues for item in items.fetch(): date_diff = datetime.now() - datetime.strptime( item['data']['created_at'], "%Y-%m-%dT%H:%M:%SZ") if date_diff.days <= 90: if 'pull_request' in item['data']: pull_count += 1 else: issue_count += 1 clubbed_data = { 'repo': repo_name, 'commit': commit_count, 'issue': issue_count, 'pull': pull_count, 'total': commit_count + issue_count + pull_count }