def get_cached_request(self, url): '''Use a combination of sqlite and ondisk caching to GET an api resource''' url_parts = url.split('/') cdf = os.path.join(self.cached_requests_dir, url.replace('https://', '') + '.json.gz') cdd = os.path.dirname(cdf) if not os.path.exists(cdd): os.makedirs(cdd) # FIXME - commits are static and can always be used from cache. if url_parts[-2] == 'commits' and os.path.exists(cdf): return read_gzip_json_file(cdf) headers = { u'Accept': u','.join(self.accepts_headers), u'Authorization': u'Bearer %s' % self.token, } meta = ADB.get_github_api_request_meta(url, token=self.token) if meta is None: meta = {} # https://developer.github.com/v3/#conditional-requests etag = meta.get('etag') if etag and os.path.exists(cdf): headers['If-None-Match'] = etag rr = requests.get(url, headers=headers) if rr.status_code == 304: # not modified with open(cdf, 'r') as f: data = json.loads(f.read()) else: data = rr.json() # handle ratelimits ... if isinstance(data, dict) and data.get(u'message'): if data[u'message'].lower().startswith( u'api rate limit exceeded'): raise RateLimitError() # cache data to disk logging.debug('write %s' % cdf) write_gzip_json_file(cdf, data) # save the meta ADB.set_github_api_request_meta(url, rr.headers, cdf, token=self.token) # pagination if hasattr(rr, u'links') and rr.links and rr.links.get(u'next'): _data = self.get_request(rr.links[u'next'][u'url']) if isinstance(data, list): data += _data else: data.update(_data) return data
def _get_url(self, url, usecache=False, timeout=TIMEOUT): cdir = os.path.join(self.cachedir, u'.raw') if not os.path.isdir(cdir): os.makedirs(cdir) cfile = url.replace(SHIPPABLE_URL + '/', u'') cfile = cfile.replace(u'/', u'_') cfile = os.path.join(cdir, cfile + u'.json') gzfile = cfile + u'.gz' # transparently compress old logs if os.path.isfile(cfile) and not os.path.isfile(gzfile): compress_gzip_file(cfile, gzfile) rc = None jdata = None if os.path.isfile(gzfile): try: fdata = read_gzip_json_file(gzfile) rc = fdata[0] jdata = fdata[1] except ValueError: pass if rc == 400: return None # always use cache for finished jobs... is_finished = False if isinstance(jdata, list): ts = [x.get('endedAt') for x in jdata] if None not in ts: is_finished = True elif isinstance(jdata, dict) and jdata.get(u'endedAt'): is_finished = True resp = None if not os.path.isfile(gzfile) or not jdata or (not usecache and not is_finished): if os.path.isfile(gzfile): logging.error(gzfile) resp = fetch(url, headers=HEADERS, timeout=timeout) if not resp: return None if resp.status_code != 400: jdata = resp.json() write_gzip_json_file(gzfile, [resp.status_code, jdata]) else: write_gzip_json_file(gzfile, [resp.status_code, {}]) return None check_response(resp) if not jdata: raise ShippableNoData return jdata