Example #1
0
class GitHubDB(object):
    def __init__(self, ghtoken):
        # Get handle to Github API
        if ghtoken is not None and ghtoken != '':
            self.gh = login(token=ghtoken)
        else:
            log.warning('Using unauthenticated access to Github API. This will result in severe rate limiting.')
            self.gh = GitHub()

    def waitForRateLimit(self, resourceType):
        """resourceType can be 'search' or 'core'."""
        try:
            rateLimitInfo = self.gh.rate_limit()['resources']
            while rateLimitInfo[resourceType]['remaining'] < (1 if resourceType == 'search' else 12):
                waitTime = max(1, rateLimitInfo[resourceType]['reset'] - time.time())
                log.warning('Waiting %s seconds for Github rate limit...', waitTime)
                time.sleep(waitTime)
                rateLimitInfo = self.gh.rate_limit()['resources']
        except ConnectionError as e:
            log.error("Connection error while querying GitHub rate limit. Retrying...")
            self.waitForRateLimit(resourceType)

    def refreshGithubUser(self, ghUserObject):
        self.waitForRateLimit('core')
        return ghUserObject.refresh(True)

    def getGithubUserForLogin(self, login, session):
        """Uses the Github API to find the user for the given username. Returns NullObject if the user was not found for any reason."""
        # Try to use cached result to avoid hitting rate limit
        cachedUser = session.query(GitHubUserCache).filter(GitHubUserCache.login == login).first()
        if cachedUser is not None:
            return cachedUser if not cachedUser.fake else NullObject()
        log.debug('Querying GutHub API for login %s', login)
        try:
            self.waitForRateLimit('core')
            potentialUser = self.gh.user(login)
            if potentialUser is None:
                # store login as fake
                session.add(GitHubUserCache(login=login, fake=True))
                return NullObject()
            actualUser = self.refreshGithubUser(potentialUser)
            if isinstance(potentialUser, NullObject):
                # store login as fake
                session.add(GitHubUserCache(login=login, fake=True))
            else:
                # cache user
                session.add(GitHubUserCache(login=login, name=actualUser.name, email=actualUser.email, company=actualUser.company, location=actualUser.location))
            return actualUser
        except ConnectionError:
            log.error("github query failed when attempting to verify username %s", login)
            return NullObject()

    def searchGithubUsers(self, query):
        self.waitForRateLimit('search')
        return self.gh.search_users(query)
Example #2
0
class GitHubAdaptor(object):
    """
    thin wrapper over github3 with the purpose of importin [trac] tickets
    """
    def __init__(self, config, dry_run=False, only_from_cache=False):
        self._dry_run = dry_run
        self.only_from_cache = only_from_cache
        self._mapping = config['mapping']
        self._template = config['template']

        self._gh = GitHub(token=config['token'])
        # Everything is done via _repo
        self._repo = self._gh.repository(config['owner'], config['repository'])
        self._upstream_repo = self._gh.repository(
            config['upstream_owner'], config['upstream_repository'])

        # get current set of available milestones
        self._milestones = dict({
            milestone.title: milestone.number
            for milestone in self._repo.iter_milestones()
        })

        self._users = dict()

        self._user_cache = config.get('user_cache', None)

        self._load_user_cache()

    def __del__(self):
        """
        save currently known user mapping
        """
        if self._user_cache is not None:
            with open(self._user_cache, 'w') as user_cache:
                dump(self._users, user_cache)

    def _load_user_cache(self):
        """
        load users that are already handled in a previous attempt
        """
        if self._user_cache is not None and os.path.isfile(self._user_cache):
            with open(self._user_cache) as user_cache:
                tempo = load(user_cache)

                assert isinstance(tempo, dict)

                self._users = tempo
                self._users.update(self._mapping)

    def ensure_milestone(self, name):
        """
        check if the given milestone is known already and if it's not create it
        """
        num = self._milestones.get(name, None)
        if num is None:
            milestone = self._repo.create_milestone(name)

            num = self._milestones[name] = milestone.number

        return num

    def find_user_in_commits(self, email):
        """
            find a user using the commit api.
            This helps to find more users, as the email is not always public for search api

            also this helps with rate limits on search api
        """
        if email in self._users:
            return self._users[email]

        gh_user = None
        for commit in self._upstream_repo.iter_commits(author=email, number=1):
            if commit.author is None:
                print email, commit.commit.author, "https://github.com/buildbot/buildbot/commit/" + commit.sha
                q = 'fullname:"{}"'.format(commit.commit.author['name'])
                result = list(self._gh.search_users(q))
                if len(result) == 1:
                    gh_user = result[0].user.login
                else:
                    print " ".join([r.user.login
                                    for r in result]), "possibilities"
                self.wait_rate_limits()
            else:
                gh_user = commit.author.login
        if gh_user is not None:
            print "found mapping for", email, ":", gh_user
            self._users[email] = gh_user
            return gh_user
        print "email not found in repositorie's authors", email
        return None

    def find_users(self, emails):
        not_mapped_users = []
        for email in emails:
            q = '{} in:email'.format(email)
            result = list(self._gh.search_users(q))
            print q, result
            if len(result) == 1:
                gh_user = result[0].user.login
                self._users[email] = gh_user
            else:
                not_mapped_users.append(email)
            self.wait_rate_limits()
        return not_mapped_users

    def wait_rate_limits(self):
        for k, v in self._gh.rate_limit()['resources'].items():
            if v['remaining'] < 2:
                print("waiting one minute for rate limiting reasons..", k)
                time.sleep(60)

    def get_user(self, user):
        """
        transform the given id to a github username if it's an public e-mail

        cache results
        take into account provided mapping
        """
        if user is None:
            return user

        gh_user = self._users.get(user, None)

        if gh_user is None and not self.only_from_cache:
            gh_user = self._mapping.get(user, user)

            if gh_user.find('@') > 0:
                result = list(
                    self._gh.search_users('{} in:email'.format(gh_user)))
                if len(result) == 1:
                    gh_user = '******'.format(result[0].user.login)

            self._users[user] = gh_user

        return gh_user

    def _user_display(self, user):
        gh_user = self.get_user(user)

        if not gh_user:
            gh_user = "******"

        if gh_user[0] == '@':
            display_user = gh_user  # this will result in a mention
        else:
            parts = gh_user.split('@')

            assert len(parts) in (1, 2), 'Special case, needs handling'

            if len(parts) == 2:  # only first part of the e-mail
                display_user = '******'.format(parts[0])
            else:  # use as is
                display_user = '******'.format(gh_user)

        return display_user

    def _convert_contributors(self, contributors):
        """
        represent the list of contributors in Markdown
        """
        result = list()

        for user, contributions in contributors.items():
            display_user = self._user_display(self.get_user(user))
            print display_user, contributions
            result.append(display_user)
        return ', '.join(result)

    def _format_comments(self, comments):
        comments_text = []
        for comment in comments:
            if comment.get('message'):
                if "Ticket retargeted after milestone closed" not in comment[
                        'message']:
                    text = ""
                    text += "Comment from: " + self._user_display(
                        self.get_user(comment['author'])) + "\n"
                    text += convert_text(self.get_user(comment['message']))
                    comments_text.append(text)
        return "\n---\n".join(comments_text)

    def create_issue(self, ticket):
        """
        create an issue in the given project
        """
        assert isinstance(ticket, dict)
        if self._dry_run:
            return None, None
        res = self._repo.create_issue(
            ticket['summary'],
            body=self._template.format(
                trac_id=ticket['id'],
                trac_url=ticket['url'],
                users=self._convert_contributors(ticket['contributors']),
                body=ticket['description'],
                creation_date=format_date(ticket['time']),
                modification_date=format_date(ticket['changetime']),
                comments=self._format_comments(ticket['comments'])),
            milestone=self.ensure_milestone(ticket['milestone']))
        return res, res.html_url
Example #3
0
def getGitHubProfiles(locations, languages, num):
    logger.info("Locations: {0}".format(locations))
    logger.info("Languages: {0}".format(languages))

    num = int(num) if num else DEFAULT_DESIRED_CANDIDATES_PER_EMAIL_DIGEST
    logger.info("Number of Profiles requested: {0}".format(num))

    logger.info("Building query string")
    queryString = ''
    for location in locations:
        queryString = queryString + 'location:\"' + location + '\" '

    for language in languages:
        queryString = queryString + 'language:\"' + language + '\" '

    queryString = queryString + 'type:User'
    logger.info("Query String = {}".format(queryString))

    logger.info("Connecting to Github")
    gh = GitHub(token=os.environ['TOKEN'])

    logger.info("Getting a list of matching users using GitHub API")
    matchingUsers = []
    for userSearchResult in gh.search_users(queryString):
        matchingUsers.append(userSearchResult.user)

    logger.info("Number of matching profiles: {}".format(len(matchingUsers)))

    userActivityDict = {}

    logger.info(
        "Using githubcontributions api to get the number of contributions for each user"
    )

    # TODO: Remove the top 25 when ready
    for u in matchingUsers[:25]:

        cmd = 'curl -s https://githubcontributions.io/api/user/' + u.login
        output = subprocess.check_output(cmd, shell=True)
        userActivityDict[u.login] = json.loads(output)['eventCount']

    logger.info("Sorting the profiles based on # of contributions")

    topUsers = sorted(userActivityDict.items(),
                      key=lambda x: x[1],
                      reverse=True)

    logger.info(
        "Emailing top {} profiles not already in the cache (not already sent before)"
        .format(num))
    r = redis.StrictRedis(host='localhost', port=6379, db=0)

    format.initialize(num)

    # TODO Run the following when done debugging, to clear the cache
    # redis-cli flushall
    count = 0
    for u in topUsers:
        if count < num:
            usr = gh.user(u[0])
            contributions = u[1]

            if not r.exists(usr.login) and (usr.company == None
                                            or 'HookLogic' not in usr.company
                                            or 'Hooklogic' not in usr.company):

                # Query StackExchange for User id
                cmd = 'curl -s http://data.stackexchange.com/stackoverflow/csv/670133?Name=' + usr.login
                output = subprocess.check_output(cmd, shell=True)
                user_id = ''
                user_id = output.split('\n')[1].replace('\"', '')
                stackoverflow_url = "http://stackoverflow.com/users/" + user_id + "/" + usr.login
                format.format_html(usr, contributions,
                                   stackoverflow_url if user_id else '')
                r.set(usr.login, True)
                count = count + 1

    format.save_file()

    send_email.send()