Esempio n. 1
0
    def get_data(cls, account, source_filter, limit=100, skip=0):
        """
        Gathers commit information from GH
        GET https://api.github.com/repos/:owner/:repo/commits
        Header: Accept: application/vnd.github.v3+json
        """
        if not account or not account.enabled:
            raise ValueError(
                'cannot gather information without a valid account')
        client = AsyncHTTPClient()

        source_filter = GitHubIssuesFilter(source_filter)

        if source_filter.repository is None:
            raise ValueError('required parameter projects missing')
        app_log.info("Starting retrieval of issues for {}".format(account._id))

        default_headers = {
            "Content-Type": "application/json",
            "Accept": "application/vnd.github.v3+json"
        }

        page_size = limit if limit is not None and limit <= 100 else 100
        taken = 0

        uri = "https://api.github.com/repos/{}/issues?per_page={}&{}".format(
            source_filter.repository, page_size, source_filter.get_qs())
        uri = uri.rstrip(
            '&')  # remove trailing & in case filter has no QS elements

        cls.write('[')
        count = 0

        while uri is not None:
            app_log.info(
                "({}) Retrieving next page, received {} issues thus far".
                format(account._id, taken))
            req = account.get_request(uri, headers=default_headers)
            response = yield client.fetch(req)

            page_data = json.loads(response.body.decode('utf-8'))

            for issue in page_data:
                if count > 0:
                    cls.write(',')
                cls.write(cls.format_data_to_schema(issue))
                count += 1

            if limit is None or count < limit:
                # parse the Link header from GitHub (https://developer.github.com/v3/#pagination)
                links = parse_link_header(response.headers.get('Link', ''))
                uri = links.get('next', None)
            else:
                break

        cls.write(']')

        app_log.info(
            "[GitHub] Finished retrieving {} issues for repository {}".format(
                count, source_filter.repository))
Esempio n. 2
0
    def get_data(cls, account, source_filter, limit=100, skip=0):
        """
        Gathers commit information from GH
        GET https://api.github.com/repos/:owner/:repo/commits
        Header: Accept: application/vnd.github.v3+json
        """
        if not account or not account.enabled:
            raise ValueError('cannot gather information without a valid account')
        client = AsyncHTTPClient()

        source_filter = GitHubIssuesFilter(source_filter)

        if source_filter.repository is None:
            raise ValueError('required parameter projects missing')
        app_log.info("Starting retrieval of issues for {}".format(account._id))

        default_headers = {"Content-Type": "application/json", "Accept": "application/vnd.github.v3+json"}

        page_size = limit if limit is not None and limit <= 100 else 100
        taken = 0

        uri = "https://api.github.com/repos/{}/issues?per_page={}&{}".format(source_filter.repository,
                                                                             page_size, source_filter.get_qs())
        uri = uri.rstrip('&')  # remove trailing & in case filter has no QS elements

        cls.write('[')
        count = 0

        while uri is not None:
            app_log.info(
                "({}) Retrieving next page, received {} issues thus far".format(account._id, taken))
            req = account.get_request(uri, headers=default_headers)
            response = yield client.fetch(req)

            page_data = json.loads(response.body.decode('utf-8'))

            for issue in page_data:
                if count > 0:
                    cls.write(',')
                cls.write(cls.format_data_to_schema(issue))
                count += 1

            if limit is None or count < limit:
                # parse the Link header from GitHub (https://developer.github.com/v3/#pagination)
                links = parse_link_header(response.headers.get('Link', ''))
                uri = links.get('next', None)
            else:
                break

        cls.write(']')

        app_log.info("[GitHub] Finished retrieving {} issues for repository {}".format(count, source_filter.repository))
Esempio n. 3
0
 def get_milestone_options(account, repository, **kwargs):
     """
     Gathers milestone options for a GitHub repository
     """
     client = AsyncHTTPClient()
     uri = "https://api.github.com/repos/{}/milestones?page_size=100".format(repository)
     data = []
     while uri is not None:
         req = account.get_request(uri)
         response = yield client.fetch(req)
         response_object = json.loads(response.body.decode('utf-8'))
         data += response_object
         links = parse_link_header(response.headers.get('Link', ''))
         uri = links.get('next', None)
     return [{"title": "#{} - {}".format(milestone['number'], milestone['title']),
              "value": milestone['number']}
             for milestone in data]
Esempio n. 4
0
    def get_repo_options(account, **kwargs):
        """
        Gathers repository options for GitHub commits source

        GET https://api.github.com/user/repos
        Special Accept header required: application/vnd.github.moondragon+json
        """
        client = AsyncHTTPClient()
        uri = "https://api.github.com/user/repos?per_page=100"
        data = []
        while uri is not None:
            req = account.get_request(uri, headers={"Accept": "application/vnd.github.moondragon+json"})
            response = yield client.fetch(req)
            response_object = json.loads(response.body.decode('utf-8'))
            data += response_object
            links = parse_link_header(response.headers.get('Link', ''))
            uri = links.get('next', None)
        return [{"title": repo['full_name'], "value": repo['full_name']}
                for repo in data]
Esempio n. 5
0
 def get_milestone_options(account, repository, **kwargs):
     """
     Gathers milestone options for a GitHub repository
     """
     client = AsyncHTTPClient()
     uri = "https://api.github.com/repos/{}/milestones?page_size=100".format(
         repository)
     data = []
     while uri is not None:
         req = account.get_request(uri)
         response = yield client.fetch(req)
         response_object = json.loads(response.body.decode('utf-8'))
         data += response_object
         links = parse_link_header(response.headers.get('Link', ''))
         uri = links.get('next', None)
     return [{
         "title":
         "#{} - {}".format(milestone['number'], milestone['title']),
         "value":
         milestone['number']
     } for milestone in data]
Esempio n. 6
0
    def get_repo_options(account, **kwargs):
        """
        Gathers repository options for GitHub commits source

        GET https://api.github.com/user/repos
        Special Accept header required: application/vnd.github.moondragon+json
        """
        client = AsyncHTTPClient()
        uri = "https://api.github.com/user/repos?per_page=100"
        data = []
        while uri is not None:
            req = account.get_request(
                uri,
                headers={"Accept": "application/vnd.github.moondragon+json"})
            response = yield client.fetch(req)
            response_object = json.loads(response.body.decode('utf-8'))
            data += response_object
            links = parse_link_header(response.headers.get('Link', ''))
            uri = links.get('next', None)
        return [{
            "title": repo['full_name'],
            "value": repo['full_name']
        } for repo in data]
Esempio n. 7
0
    def get_data(cls, account, source_filter, limit=100, skip=0):
        """
        Gathers commit information from GH
        GET https://api.github.com/repos/:owner/:repo/commits
        Header: Accept: application/vnd.github.v3+json
        """
        if not account or not account.enabled:
            raise ValueError('cannot gather information without a valid account')
        client = AsyncHTTPClient()

        source_filter = GitHubRepositoryDateFilter(source_filter)

        if source_filter.repository is None:
            raise ValueError('required parameter projects missing')

        default_headers = {"Content-Type": "application/json", "Accept": "application/vnd.github.v3+json"}

        # first we grab our list of commits
        uri = "https://api.github.com/repos/{}/commits".format(source_filter.repository)
        qs = source_filter.get_qs()
        if qs != '':
            uri = uri + '?' + qs
        app_log.info("Starting retrieval of commit list for account {}".format(account._id))
        if limit is not None and limit <= 100:
            # we can handle our limit right here
            uri += "?per_page={}".format(limit)
        elif limit is None:
            uri += "?per_page=100"  # maximum number per page for GitHub API
        taken = 0

        queue = Queue()
        sem = BoundedSemaphore(FETCH_CONCURRENCY)
        done, working = set(), set()

        while uri is not None:
            app_log.info(
                "({}) Retrieving next page, received {} commits thus far".format(account._id, taken))
            req = account.get_request(uri, headers=default_headers)
            response = yield client.fetch(req)

            page_data = json.loads(response.body.decode('utf-8'))
            taken += page_data.__len__()
            for item in page_data:
                queue.put(item.get('url', None))

            if limit is None or taken < limit:
                # parse the Link header from GitHub (https://developer.github.com/v3/#pagination)
                links = parse_link_header(response.headers.get('Link', ''))
                uri = links.get('next', None)
            else:
                break

            if queue.qsize() > 500:
                raise HTTPError(413, 'too many commits')
        app_log.info("({}) Commit list retrieved, fetching info for {} commits".format(account._id, taken))

        # open our list
        cls.write('[')

        # our worker to actually fetch the info
        @gen.coroutine
        def fetch_url():
            current_url = yield queue.get()
            try:
                if current_url in working:
                    return
                page_no = working.__len__()
                app_log.info("Fetching page {}".format(page_no))
                working.add(current_url)
                req = account.get_request(current_url)
                client = AsyncHTTPClient()
                response = yield client.fetch(req)
                response_data = json.loads(response.body.decode('utf-8'))
                obj = {
                    'date': response_data['commit']['author']['date'],
                    'author': response_data['commit']['author']['name'],
                    'added_files': [file for file in response_data['files'] if file['status'] == 'added'].__len__(),
                    'deleted_files': [file for file in response_data['files'] if file['status'] == 'deleted'].__len__(),
                    'modified_files': [file for file in response_data['files'] if file['status'] == 'modified'].__len__(),
                    'additions': response_data['stats']['additions'],
                    'deletions': response_data['stats']['deletions']
                }
                if done.__len__() > 0:
                    cls.write(',')
                cls.write(json.dumps(obj))
                done.add(current_url)
                app_log.info("Page {} downloaded".format(page_no))

            finally:
                queue.task_done()
                sem.release()

        @gen.coroutine
        def worker():
            while True:
                yield sem.acquire()
                fetch_url()

        # start our concurrency worker
        worker()
        try:
            # wait until we're done
            yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME))
        except gen.TimeoutError:
            app_log.warning("Request exceeds maximum time, cutting response short")
        finally:
            # close our list
            cls.write(']')
        app_log.info("Finished retrieving commits for {}".format(account._id))