class GitHubDB(object): def __init__(self, ghtoken): # Get handle to Github API if ghtoken is not None and ghtoken != '': self.gh = login(token=ghtoken) else: log.warning('Using unauthenticated access to Github API. This will result in severe rate limiting.') self.gh = GitHub() def waitForRateLimit(self, resourceType): """resourceType can be 'search' or 'core'.""" try: rateLimitInfo = self.gh.rate_limit()['resources'] while rateLimitInfo[resourceType]['remaining'] < (1 if resourceType == 'search' else 12): waitTime = max(1, rateLimitInfo[resourceType]['reset'] - time.time()) log.warning('Waiting %s seconds for Github rate limit...', waitTime) time.sleep(waitTime) rateLimitInfo = self.gh.rate_limit()['resources'] except ConnectionError as e: log.error("Connection error while querying GitHub rate limit. Retrying...") self.waitForRateLimit(resourceType) def refreshGithubUser(self, ghUserObject): self.waitForRateLimit('core') return ghUserObject.refresh(True) def getGithubUserForLogin(self, login, session): """Uses the Github API to find the user for the given username. Returns NullObject if the user was not found for any reason.""" # Try to use cached result to avoid hitting rate limit cachedUser = session.query(GitHubUserCache).filter(GitHubUserCache.login == login).first() if cachedUser is not None: return cachedUser if not cachedUser.fake else NullObject() log.debug('Querying GutHub API for login %s', login) try: self.waitForRateLimit('core') potentialUser = self.gh.user(login) if potentialUser is None: # store login as fake session.add(GitHubUserCache(login=login, fake=True)) return NullObject() actualUser = self.refreshGithubUser(potentialUser) if isinstance(potentialUser, NullObject): # store login as fake session.add(GitHubUserCache(login=login, fake=True)) else: # cache user session.add(GitHubUserCache(login=login, name=actualUser.name, email=actualUser.email, company=actualUser.company, location=actualUser.location)) return actualUser except ConnectionError: log.error("github query failed when attempting to verify username %s", login) return NullObject() def searchGithubUsers(self, query): self.waitForRateLimit('search') return self.gh.search_users(query)
def is_github_api_limit_reached(e: github3.GitHubError, gh: github3.GitHub) -> bool: """Prints diagnostic information about a github exception. Returns ------- out_of_api_credits A flag to indicate that the api limit has been exhausted """ print(e) print(e.response) print(e.response.url) try: c = gh.rate_limit()["resources"]["core"] except Exception: # if we can't connect to the rate limit API, let's assume it has been reached return True if c["remaining"] == 0: ts = c["reset"] print("API timeout, API returns at") print( datetime.datetime.utcfromtimestamp(ts).strftime( "%Y-%m-%dT%H:%M:%SZ")) return True return False
def _get_files(owner, repo, sha, tokens): """Get repo file paths """ # TODO: use other tokens if first fails github_api = GitHub(token=tokens[0]) repo_api = github_api.repository(owner, repo) # First attempt - use GitHub Tree API files = _get_files_tree_api(repo_api, sha) if files is None: # Tree is trancated - use GitHub Contents API files = _get_files_contents_api(repo_api, sha) log.debug('Remaining GitHub API calls: %s', github_api.rate_limit()['rate']['remaining']) return files
def get_session() -> GitHub: global GITHUB_SESSION if GITHUB_SESSION is not None: return GITHUB_SESSION token = CONFIG['github_token'] # Increase read timeout for creating PRs with long bodies. sess = github3.session.GitHubSession(default_read_timeout=30) gh = GitHub(token=token, session=sess) rate_limit = gh.rate_limit()['rate'] limit = rate_limit['limit'] remaining = rate_limit['remaining'] reset = datetime.utcfromtimestamp(rate_limit['reset']) print(f"{remaining}/{limit} rate limit remaining") print(f"Reset at {reset} UTC (in {reset - datetime.utcnow()})") GITHUB_SESSION = gh return GITHUB_SESSION
class GitHubAdaptor(object): """ thin wrapper over github3 with the purpose of importin [trac] tickets """ def __init__(self, config, dry_run=False, only_from_cache=False): self._dry_run = dry_run self.only_from_cache = only_from_cache self._mapping = config['mapping'] self._template = config['template'] self._gh = GitHub(token=config['token']) # Everything is done via _repo self._repo = self._gh.repository(config['owner'], config['repository']) self._upstream_repo = self._gh.repository( config['upstream_owner'], config['upstream_repository']) # get current set of available milestones self._milestones = dict({ milestone.title: milestone.number for milestone in self._repo.iter_milestones() }) self._users = dict() self._user_cache = config.get('user_cache', None) self._load_user_cache() def __del__(self): """ save currently known user mapping """ if self._user_cache is not None: with open(self._user_cache, 'w') as user_cache: dump(self._users, user_cache) def _load_user_cache(self): """ load users that are already handled in a previous attempt """ if self._user_cache is not None and os.path.isfile(self._user_cache): with open(self._user_cache) as user_cache: tempo = load(user_cache) assert isinstance(tempo, dict) self._users = tempo self._users.update(self._mapping) def ensure_milestone(self, name): """ check if the given milestone is known already and if it's not create it """ num = self._milestones.get(name, None) if num is None: milestone = self._repo.create_milestone(name) num = self._milestones[name] = milestone.number return num def find_user_in_commits(self, email): """ find a user using the commit api. This helps to find more users, as the email is not always public for search api also this helps with rate limits on search api """ if email in self._users: return self._users[email] gh_user = None for commit in self._upstream_repo.iter_commits(author=email, number=1): if commit.author is None: print email, commit.commit.author, "https://github.com/buildbot/buildbot/commit/" + commit.sha q = 'fullname:"{}"'.format(commit.commit.author['name']) result = list(self._gh.search_users(q)) if len(result) == 1: gh_user = result[0].user.login else: print " ".join([r.user.login for r in result]), "possibilities" self.wait_rate_limits() else: gh_user = commit.author.login if gh_user is not None: print "found mapping for", email, ":", gh_user self._users[email] = gh_user return gh_user print "email not found in repositorie's authors", email return None def find_users(self, emails): not_mapped_users = [] for email in emails: q = '{} in:email'.format(email) result = list(self._gh.search_users(q)) print q, result if len(result) == 1: gh_user = result[0].user.login self._users[email] = gh_user else: not_mapped_users.append(email) self.wait_rate_limits() return not_mapped_users def wait_rate_limits(self): for k, v in self._gh.rate_limit()['resources'].items(): if v['remaining'] < 2: print("waiting one minute for rate limiting reasons..", k) time.sleep(60) def get_user(self, user): """ transform the given id to a github username if it's an public e-mail cache results take into account provided mapping """ if user is None: return user gh_user = self._users.get(user, None) if gh_user is None and not self.only_from_cache: gh_user = self._mapping.get(user, user) if gh_user.find('@') > 0: result = list( self._gh.search_users('{} in:email'.format(gh_user))) if len(result) == 1: gh_user = '******'.format(result[0].user.login) self._users[user] = gh_user return gh_user def _user_display(self, user): gh_user = self.get_user(user) if not gh_user: gh_user = "******" if gh_user[0] == '@': display_user = gh_user # this will result in a mention else: parts = gh_user.split('@') assert len(parts) in (1, 2), 'Special case, needs handling' if len(parts) == 2: # only first part of the e-mail display_user = '******'.format(parts[0]) else: # use as is display_user = '******'.format(gh_user) return display_user def _convert_contributors(self, contributors): """ represent the list of contributors in Markdown """ result = list() for user, contributions in contributors.items(): display_user = self._user_display(self.get_user(user)) print display_user, contributions result.append(display_user) return ', '.join(result) def _format_comments(self, comments): comments_text = [] for comment in comments: if comment.get('message'): if "Ticket retargeted after milestone closed" not in comment[ 'message']: text = "" text += "Comment from: " + self._user_display( self.get_user(comment['author'])) + "\n" text += convert_text(self.get_user(comment['message'])) comments_text.append(text) return "\n---\n".join(comments_text) def create_issue(self, ticket): """ create an issue in the given project """ assert isinstance(ticket, dict) if self._dry_run: return None, None res = self._repo.create_issue( ticket['summary'], body=self._template.format( trac_id=ticket['id'], trac_url=ticket['url'], users=self._convert_contributors(ticket['contributors']), body=ticket['description'], creation_date=format_date(ticket['time']), modification_date=format_date(ticket['changetime']), comments=self._format_comments(ticket['comments'])), milestone=self.ensure_milestone(ticket['milestone'])) return res, res.html_url