class GitHubDB(object):
    def __init__(self, ghtoken):
        # Get handle to Github API
        if ghtoken is not None and ghtoken != '':
            self.gh = login(token=ghtoken)
        else:
            log.warning('Using unauthenticated access to Github API. This will result in severe rate limiting.')
            self.gh = GitHub()

    def waitForRateLimit(self, resourceType):
        """resourceType can be 'search' or 'core'."""
        try:
            rateLimitInfo = self.gh.rate_limit()['resources']
            while rateLimitInfo[resourceType]['remaining'] < (1 if resourceType == 'search' else 12):
                waitTime = max(1, rateLimitInfo[resourceType]['reset'] - time.time())
                log.warning('Waiting %s seconds for Github rate limit...', waitTime)
                time.sleep(waitTime)
                rateLimitInfo = self.gh.rate_limit()['resources']
        except ConnectionError as e:
            log.error("Connection error while querying GitHub rate limit. Retrying...")
            self.waitForRateLimit(resourceType)

    def refreshGithubUser(self, ghUserObject):
        self.waitForRateLimit('core')
        return ghUserObject.refresh(True)

    def getGithubUserForLogin(self, login, session):
        """Uses the Github API to find the user for the given username. Returns NullObject if the user was not found for any reason."""
        # Try to use cached result to avoid hitting rate limit
        cachedUser = session.query(GitHubUserCache).filter(GitHubUserCache.login == login).first()
        if cachedUser is not None:
            return cachedUser if not cachedUser.fake else NullObject()
        log.debug('Querying GutHub API for login %s', login)
        try:
            self.waitForRateLimit('core')
            potentialUser = self.gh.user(login)
            if potentialUser is None:
                # store login as fake
                session.add(GitHubUserCache(login=login, fake=True))
                return NullObject()
            actualUser = self.refreshGithubUser(potentialUser)
            if isinstance(potentialUser, NullObject):
                # store login as fake
                session.add(GitHubUserCache(login=login, fake=True))
            else:
                # cache user
                session.add(GitHubUserCache(login=login, name=actualUser.name, email=actualUser.email, company=actualUser.company, location=actualUser.location))
            return actualUser
        except ConnectionError:
            log.error("github query failed when attempting to verify username %s", login)
            return NullObject()

    def searchGithubUsers(self, query):
        self.waitForRateLimit('search')
        return self.gh.search_users(query)
Exemple #2
0
def is_github_api_limit_reached(e: github3.GitHubError,
                                gh: github3.GitHub) -> bool:
    """Prints diagnostic information about a github exception.

    Returns
    -------
    out_of_api_credits
        A flag to indicate that the api limit has been exhausted
    """
    print(e)
    print(e.response)
    print(e.response.url)

    try:
        c = gh.rate_limit()["resources"]["core"]
    except Exception:
        # if we can't connect to the rate limit API, let's assume it has been reached
        return True
    if c["remaining"] == 0:
        ts = c["reset"]
        print("API timeout, API returns at")
        print(
            datetime.datetime.utcfromtimestamp(ts).strftime(
                "%Y-%m-%dT%H:%M:%SZ"))
        return True
    return False
Exemple #3
0
def _get_files(owner, repo, sha, tokens):
    """Get repo file paths
    """
    # TODO: use other tokens if first fails
    github_api = GitHub(token=tokens[0])
    repo_api = github_api.repository(owner, repo)
    # First attempt - use GitHub Tree API
    files = _get_files_tree_api(repo_api, sha)
    if files is None:
        # Tree is trancated - use GitHub Contents API
        files = _get_files_contents_api(repo_api, sha)
    log.debug('Remaining GitHub API calls: %s',
              github_api.rate_limit()['rate']['remaining'])
    return files
Exemple #4
0
def get_session() -> GitHub:
    global GITHUB_SESSION

    if GITHUB_SESSION is not None:
        return GITHUB_SESSION

    token = CONFIG['github_token']

    # Increase read timeout for creating PRs with long bodies.
    sess = github3.session.GitHubSession(default_read_timeout=30)
    gh = GitHub(token=token, session=sess)
    rate_limit = gh.rate_limit()['rate']
    limit = rate_limit['limit']
    remaining = rate_limit['remaining']
    reset = datetime.utcfromtimestamp(rate_limit['reset'])
    print(f"{remaining}/{limit} rate limit remaining")
    print(f"Reset at {reset} UTC (in {reset - datetime.utcnow()})")
    GITHUB_SESSION = gh
    return GITHUB_SESSION
Exemple #5
0
class GitHubAdaptor(object):
    """
    thin wrapper over github3 with the purpose of importin [trac] tickets
    """
    def __init__(self, config, dry_run=False, only_from_cache=False):
        self._dry_run = dry_run
        self.only_from_cache = only_from_cache
        self._mapping = config['mapping']
        self._template = config['template']

        self._gh = GitHub(token=config['token'])
        # Everything is done via _repo
        self._repo = self._gh.repository(config['owner'], config['repository'])
        self._upstream_repo = self._gh.repository(
            config['upstream_owner'], config['upstream_repository'])

        # get current set of available milestones
        self._milestones = dict({
            milestone.title: milestone.number
            for milestone in self._repo.iter_milestones()
        })

        self._users = dict()

        self._user_cache = config.get('user_cache', None)

        self._load_user_cache()

    def __del__(self):
        """
        save currently known user mapping
        """
        if self._user_cache is not None:
            with open(self._user_cache, 'w') as user_cache:
                dump(self._users, user_cache)

    def _load_user_cache(self):
        """
        load users that are already handled in a previous attempt
        """
        if self._user_cache is not None and os.path.isfile(self._user_cache):
            with open(self._user_cache) as user_cache:
                tempo = load(user_cache)

                assert isinstance(tempo, dict)

                self._users = tempo
                self._users.update(self._mapping)

    def ensure_milestone(self, name):
        """
        check if the given milestone is known already and if it's not create it
        """
        num = self._milestones.get(name, None)
        if num is None:
            milestone = self._repo.create_milestone(name)

            num = self._milestones[name] = milestone.number

        return num

    def find_user_in_commits(self, email):
        """
            find a user using the commit api.
            This helps to find more users, as the email is not always public for search api

            also this helps with rate limits on search api
        """
        if email in self._users:
            return self._users[email]

        gh_user = None
        for commit in self._upstream_repo.iter_commits(author=email, number=1):
            if commit.author is None:
                print email, commit.commit.author, "https://github.com/buildbot/buildbot/commit/" + commit.sha
                q = 'fullname:"{}"'.format(commit.commit.author['name'])
                result = list(self._gh.search_users(q))
                if len(result) == 1:
                    gh_user = result[0].user.login
                else:
                    print " ".join([r.user.login
                                    for r in result]), "possibilities"
                self.wait_rate_limits()
            else:
                gh_user = commit.author.login
        if gh_user is not None:
            print "found mapping for", email, ":", gh_user
            self._users[email] = gh_user
            return gh_user
        print "email not found in repositorie's authors", email
        return None

    def find_users(self, emails):
        not_mapped_users = []
        for email in emails:
            q = '{} in:email'.format(email)
            result = list(self._gh.search_users(q))
            print q, result
            if len(result) == 1:
                gh_user = result[0].user.login
                self._users[email] = gh_user
            else:
                not_mapped_users.append(email)
            self.wait_rate_limits()
        return not_mapped_users

    def wait_rate_limits(self):
        for k, v in self._gh.rate_limit()['resources'].items():
            if v['remaining'] < 2:
                print("waiting one minute for rate limiting reasons..", k)
                time.sleep(60)

    def get_user(self, user):
        """
        transform the given id to a github username if it's an public e-mail

        cache results
        take into account provided mapping
        """
        if user is None:
            return user

        gh_user = self._users.get(user, None)

        if gh_user is None and not self.only_from_cache:
            gh_user = self._mapping.get(user, user)

            if gh_user.find('@') > 0:
                result = list(
                    self._gh.search_users('{} in:email'.format(gh_user)))
                if len(result) == 1:
                    gh_user = '******'.format(result[0].user.login)

            self._users[user] = gh_user

        return gh_user

    def _user_display(self, user):
        gh_user = self.get_user(user)

        if not gh_user:
            gh_user = "******"

        if gh_user[0] == '@':
            display_user = gh_user  # this will result in a mention
        else:
            parts = gh_user.split('@')

            assert len(parts) in (1, 2), 'Special case, needs handling'

            if len(parts) == 2:  # only first part of the e-mail
                display_user = '******'.format(parts[0])
            else:  # use as is
                display_user = '******'.format(gh_user)

        return display_user

    def _convert_contributors(self, contributors):
        """
        represent the list of contributors in Markdown
        """
        result = list()

        for user, contributions in contributors.items():
            display_user = self._user_display(self.get_user(user))
            print display_user, contributions
            result.append(display_user)
        return ', '.join(result)

    def _format_comments(self, comments):
        comments_text = []
        for comment in comments:
            if comment.get('message'):
                if "Ticket retargeted after milestone closed" not in comment[
                        'message']:
                    text = ""
                    text += "Comment from: " + self._user_display(
                        self.get_user(comment['author'])) + "\n"
                    text += convert_text(self.get_user(comment['message']))
                    comments_text.append(text)
        return "\n---\n".join(comments_text)

    def create_issue(self, ticket):
        """
        create an issue in the given project
        """
        assert isinstance(ticket, dict)
        if self._dry_run:
            return None, None
        res = self._repo.create_issue(
            ticket['summary'],
            body=self._template.format(
                trac_id=ticket['id'],
                trac_url=ticket['url'],
                users=self._convert_contributors(ticket['contributors']),
                body=ticket['description'],
                creation_date=format_date(ticket['time']),
                modification_date=format_date(ticket['changetime']),
                comments=self._format_comments(ticket['comments'])),
            milestone=self.ensure_milestone(ticket['milestone']))
        return res, res.html_url