Esempio n. 1
0
    def get_cached_request(self, url):
        '''Use a combination of sqlite and ondisk caching to GET an api resource'''

        url_parts = url.split('/')

        cdf = os.path.join(self.cached_requests_dir,
                           url.replace('https://', '') + '.json.gz')
        cdd = os.path.dirname(cdf)
        if not os.path.exists(cdd):
            os.makedirs(cdd)

        # FIXME - commits are static and can always be used from cache.
        if url_parts[-2] == 'commits' and os.path.exists(cdf):
            return read_gzip_json_file(cdf)

        headers = {
            u'Accept': u','.join(self.accepts_headers),
            u'Authorization': u'Bearer %s' % self.token,
        }

        meta = ADB.get_github_api_request_meta(url, token=self.token)
        if meta is None:
            meta = {}

        # https://developer.github.com/v3/#conditional-requests
        etag = meta.get('etag')
        if etag and os.path.exists(cdf):
            headers['If-None-Match'] = etag

        rr = requests.get(url, headers=headers)

        if rr.status_code == 304:
            # not modified
            with open(cdf, 'r') as f:
                data = json.loads(f.read())
        else:
            data = rr.json()

            # handle ratelimits ...
            if isinstance(data, dict) and data.get(u'message'):
                if data[u'message'].lower().startswith(
                        u'api rate limit exceeded'):
                    raise RateLimitError()

            # cache data to disk
            logging.debug('write %s' % cdf)
            write_gzip_json_file(cdf, data)

        # save the meta
        ADB.set_github_api_request_meta(url, rr.headers, cdf, token=self.token)

        # pagination
        if hasattr(rr, u'links') and rr.links and rr.links.get(u'next'):
            _data = self.get_request(rr.links[u'next'][u'url'])
            if isinstance(data, list):
                data += _data
            else:
                data.update(_data)

        return data
Esempio n. 2
0
    def _get_url(self, url, usecache=False, timeout=TIMEOUT):
        cdir = os.path.join(self.cachedir, u'.raw')
        if not os.path.isdir(cdir):
            os.makedirs(cdir)
        cfile = url.replace(SHIPPABLE_URL + '/', u'')
        cfile = cfile.replace(u'/', u'_')
        cfile = os.path.join(cdir, cfile + u'.json')
        gzfile = cfile + u'.gz'

        # transparently compress old logs
        if os.path.isfile(cfile) and not os.path.isfile(gzfile):
            compress_gzip_file(cfile, gzfile)

        rc = None
        jdata = None
        if os.path.isfile(gzfile):
            try:
                fdata = read_gzip_json_file(gzfile)
                rc = fdata[0]
                jdata = fdata[1]
            except ValueError:
                pass

            if rc == 400:
                return None

        # always use cache for finished jobs...
        is_finished = False
        if isinstance(jdata, list):
            ts = [x.get('endedAt') for x in jdata]
            if None not in ts:
                is_finished = True
        elif isinstance(jdata, dict) and jdata.get(u'endedAt'):
            is_finished = True

        resp = None
        if not os.path.isfile(gzfile) or not jdata or (not usecache
                                                       and not is_finished):
            if os.path.isfile(gzfile):
                logging.error(gzfile)

            resp = fetch(url, headers=HEADERS, timeout=timeout)
            if not resp:
                return None

            if resp.status_code != 400:
                jdata = resp.json()
                write_gzip_json_file(gzfile, [resp.status_code, jdata])
            else:
                write_gzip_json_file(gzfile, [resp.status_code, {}])
                return None

        check_response(resp)

        if not jdata:
            raise ShippableNoData

        return jdata