def prepare_response(self, request, cached): """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ # Special case the '*' Vary value as it means we cannot actually # determine if the cached response is suitable for this request. if "*" in cached.get("vary", {}): return # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: return body_raw = cached["response"].pop("body") headers = CaseInsensitiveDict(data=cached["response"]["headers"]) if headers.get("transfer-encoding", "") == "chunked": headers.pop("transfer-encoding") cached["response"]["headers"] = headers try: body = io.BytesIO(body_raw) except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as # a Python 3 str (unicode string), which will cause the above to # fail with: # # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode("utf8")) return HTTPResponse(body=body, preload_content=False, **cached["response"])
def send( self, request, # type: PreparedRequest stream=False, # type: bool timeout=None, # type: Optional[Union[float, Tuple[float, float]]] verify=True, # type: Union[bool, str] cert=None, # type: Optional[Union[str, Tuple[str, str]]] proxies=None, # type:Optional[Mapping[str, str]] ): # type: (...) -> Response pathname = url_to_path(request.url) resp = Response() resp.status_code = 200 resp.url = request.url try: stats = os.stat(pathname) except OSError as exc: resp.status_code = 404 resp.raw = exc else: modified = email.utils.formatdate(stats.st_mtime, usegmt=True) content_type = mimetypes.guess_type(pathname)[0] or "text/plain" resp.headers = CaseInsensitiveDict({ "Content-Type": content_type, "Content-Length": stats.st_size, "Last-Modified": modified, }) resp.raw = open(pathname, "rb") resp.close = resp.raw.close return resp
def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: body = response.read(decode_content=False) # NOTE: 99% sure this is dead code. I'm only leaving it # here b/c I don't have a test yet to prove # it. Basically, before using # `cachecontrol.filewrapper.CallbackFileWrapper`, # this made an effort to reset the app.ini handle. The # `CallbackFileWrapper` short circuits this code by # setting the body as the content is consumed, the # result being a `body` argument is *always* passed # into cache_response, and in turn, # `Serializer.dump`. response._fp = io.BytesIO(body) data = { "response": { "body": _b64_encode_bytes(body), "headers": dict((_b64_encode(k), _b64_encode(v)) for k, v in response.headers.items()), "status": response.status, "version": response.version, "reason": _b64_encode_str(response.reason), "strict": response.strict, "decode_content": response.decode_content, }, } # Construct our vary headers data["vary"] = {} if "vary" in response_headers: varied_headers = response_headers['vary'].split(',') for header in varied_headers: header = header.strip() data["vary"][header] = request.headers.get(header, None) # Encode our Vary headers to ensure they can be serialized as JSON data["vary"] = dict( (_b64_encode(k), _b64_encode(v) if v is not None else v) for k, v in data["vary"].items()) return b",".join([ b"cc=2", zlib.compress( json.dumps( data, separators=(",", ":"), sort_keys=True, ).encode("utf8"), ), ])
def build_response(self, req, resp): """Builds a :class:`Response <requests.Response>` object from a urllib3 response. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>` :param req: The :class:`PreparedRequest <PreparedRequest>` used to generate the response. :param resp: The urllib3 response object. """ response = Response() # Fallback to None if there's no status_code, for whatever reason. response.status_code = getattr(resp, 'status', None) # Make headers case-insensitive. response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) # Set encoding. response.encoding = get_encoding_from_headers(response.headers) response.raw = resp response.reason = response.raw.reason if isinstance(req.url, bytes): response.url = req.url.decode('utf-8') else: response.url = req.url # Add new cookies from the server. extract_cookies_to_jar(response.cookies, req, resp) # Give the Response some context. response.request = req response.connection = self return response
def send(self, request, stream=None, timeout=None, verify=None, cert=None, proxies=None): parsed_url = urlparse.urlparse(request.url) # We only work for requests with a host of localhost if parsed_url.netloc.lower() != "localhost": raise InvalidURL( "Invalid URL %r: Only localhost is allowed" % request.url ) real_url = urlparse.urlunparse(parsed_url[:1] + ("",) + parsed_url[2:]) pathname = url_to_path(real_url) resp = Response() resp.status_code = 200 resp.url = real_url stats = os.stat(pathname) modified = email.utils.formatdate(stats.st_mtime, usegmt=True) resp.headers = CaseInsensitiveDict({ "Content-Type": mimetypes.guess_type(pathname)[0] or "text/plain", "Content-Length": stats.st_size, "Last-Modified": modified, }) resp.raw = LocalFSResponse(open(pathname, "rb")) resp.close = resp.raw.close return resp
def send(self, request, stream=None, timeout=None, verify=None, cert=None, proxies=None): pathname = url_to_path(request.url) resp = Response() resp.status_code = 200 resp.url = request.url try: stats = os.stat(pathname) except OSError as exc: resp.status_code = 404 resp.raw = exc else: modified = email.utils.formatdate(stats.st_mtime, usegmt=True) content_type = mimetypes.guess_type(pathname)[0] or "text/plain" resp.headers = CaseInsensitiveDict({ "Content-Type": content_type, "Content-Length": stats.st_size, "Last-Modified": modified, }) resp.raw = open(pathname, "rb") resp.close = resp.raw.close return resp
def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: body = response.read(decode_content=False) response._fp = io.BytesIO(body) data = { "response": { "body": body, "headers": response.headers, "status": response.status, "version": response.version, "reason": response.reason, "strict": response.strict, "decode_content": response.decode_content, }, } # Construct our vary headers data["vary"] = {} if "vary" in response_headers: varied_headers = response_headers['vary'].split(',') for header in varied_headers: header = header.strip() data["vary"][header] = request.headers.get(header, None) return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: body = response.read(decode_content=False) # NOTE: 99% sure this is dead code. I'm only leaving it # here b/c I don't have a base yet to prove # it. Basically, before using # `cachecontrol.filewrapper.CallbackFileWrapper`, # this made an effort to reset the file handle. The # `CallbackFileWrapper` short circuits this code by # setting the body as the content is consumed, the # result being a `body` argument is *always* passed # into cache_response, and in turn, # `Serializer.dump`. response._fp = io.BytesIO(body) # NOTE: This is all a bit weird, but it's really important that on # Python 2.x these objects are unicode and not str, even when # they contain only ascii. The problem here is that msgpack # understands the difference between unicode and bytes and we # have it set to differentiate between them, however Python 2 # doesn't know the difference. Forcing these to unicode will be # enough to have msgpack know the difference. data = { u"response": { u"body": body, u"headers": dict((text_type(k), text_type(v)) for k, v in response.headers.items()), u"status": response.status, u"version": response.version, u"reason": text_type(response.reason), u"strict": response.strict, u"decode_content": response.decode_content, } } # Construct our vary headers data[u"vary"] = {} if u"vary" in response_headers: varied_headers = response_headers[u"vary"].split(",") for header in varied_headers: header = text_type(header).strip() header_value = request.headers.get(header, None) if header_value is not None: header_value = text_type(header_value) data[u"vary"][header] = header_value return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
def __init__(self, raw, url, length, content_type): super(S3Response, self).__init__() self.status_code = 200 self.raw = S3RawResponse(raw) self.headers = CaseInsensitiveDict({ "Content-Type": content_type, "Content-Length": length, }) self.url = url
def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), 'Accept': '*/*' })
def cache_response(self, request, response, body=None): """ Algorithm for caching requests. This assumes a requests Response object. """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests if response.status not in [200, 203, 300, 301]: return response_headers = CaseInsensitiveDict(response.headers) cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) cache_url = self.cache_url(request.url) # Delete it from the cache if we happen to have it stored there no_store = cc.get('no-store') or cc_req.get('no-store') if no_store and self.cache.get(cache_url): self.cache.delete(cache_url) # If we've been given an etag, then keep the response if self.cache_etags and 'etag' in response_headers: self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) # Add to the cache any 301s. We do this before looking that # the Date headers. elif response.status == 301: self.cache.set(cache_url, self.serializer.dumps(request, response)) # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif 'date' in response_headers: # cache when there is a max-age > 0 if cc and cc.get('max-age'): if int(cc['max-age']) > 0: self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) # If the request can expire, it means we should cache it # in the meantime. elif 'expires' in response_headers: if response_headers['expires']: self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), )
def prepare_response(self, request, cached, body_file=None): """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ # Special case the '*' Vary value as it means we cannot actually # determine if the cached response is suitable for this request. # This case is also handled in the controller code when creating # a cache entry, but is left here for backwards compatibility. if "*" in cached.get("vary", {}): return # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: return body_raw = cached["response"].pop("body") headers = CaseInsensitiveDict(data=cached["response"]["headers"]) if headers.get("transfer-encoding", "") == "chunked": headers.pop("transfer-encoding") cached["response"]["headers"] = headers try: if body_file is None: body = io.BytesIO(body_raw) else: body = body_file except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as # a Python 3 str (unicode string), which will cause the above to # fail with: # # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode("utf8")) return HTTPResponse(body=body, preload_content=False, **cached["response"])
def prepare_response(self, request, cached): """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ # Special case the '*' Vary value as it means we cannot actually # determine if the cached response is suitable for this request. if "*" in cached.get("vary", {}): return # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: return body_raw = cached["response"].pop("body") headers = CaseInsensitiveDict(data=cached['response']['headers']) if headers.get('transfer-encoding', '') == 'chunked': headers.pop('transfer-encoding') cached['response']['headers'] = headers try: body = io.BytesIO(body_raw) except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as # a Python 3 str (unicode string), which will cause the above to # fail with: # # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode('utf8')) return HTTPResponse( body=body, preload_content=False, **cached["response"] )
def __init__(self): super(Response, self).__init__() self._content = False self._content_consumed = False #: Integer Code of responded HTTP Status. self.status_code = None #: Case-insensitive Dictionary of Response Headers. #: For example, ``headers['content-encoding']`` will return the #: value of a ``'Content-Encoding'`` response header. self.headers = CaseInsensitiveDict() #: File-like object representation of response (for advanced usage). #: Requires that ``stream=True` on the request. # This requirement does not apply for use internally to Requests. self.raw = None #: Final URL location of Response. self.url = None #: Encoding to decode with when accessing r.text. self.encoding = None #: A list of :class:`Response <Response>` objects from #: the history of the Request. Any redirect responses will end #: up here. The list is sorted from the oldest to the most recent request. self.history = [] self.reason = None #: A CookieJar of Cookies the server sent back. self.cookies = cookiejar_from_dict({}) #: The amount of time elapsed between sending the request #: and the arrival of the response (as a timedelta) self.elapsed = datetime.timedelta(0)
def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: # When a body isn't passed in, we'll read the response. We # also update the response with a new file handler to be # sure it acts as though it was never read. body = response.read(decode_content=False) response._fp = io.BytesIO(body) # NOTE: This is all a bit weird, but it's really important that on # Python 2.x these objects are unicode and not str, even when # they contain only ascii. The problem here is that msgpack # understands the difference between unicode and bytes and we # have it set to differentiate between them, however Python 2 # doesn't know the difference. Forcing these to unicode will be # enough to have msgpack know the difference. data = { u"response": { u"body": body, u"headers": dict((text_type(k), text_type(v)) for k, v in response.headers.items()), u"status": response.status, u"version": response.version, u"reason": text_type(response.reason), u"strict": response.strict, u"decode_content": response.decode_content, } } # Construct our vary headers data[u"vary"] = {} if u"vary" in response_headers: varied_headers = response_headers[u"vary"].split(",") for header in varied_headers: header = text_type(header).strip() header_value = request.headers.get(header, None) if header_value is not None: header_value = text_type(header_value) data[u"vary"][header] = header_value return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
def conditional_headers(self, request): cache_url = self.cache_url(request.url) resp = self.serializer.loads(request, self.cache.get(cache_url)) new_headers = {} if resp: headers = CaseInsensitiveDict(resp.headers) if "etag" in headers: new_headers["If-None-Match"] = headers["ETag"] if "last-modified" in headers: new_headers["If-Modified-Since"] = headers["Last-Modified"] return new_headers
def conditional_headers(self, request): cache_url = self.cache_url(request.url) resp = self.serializer.loads(request, self.cache.get(cache_url)) new_headers = {} if resp: headers = CaseInsensitiveDict(resp.headers) if 'etag' in headers: new_headers['If-None-Match'] = headers['ETag'] if 'last-modified' in headers: new_headers['If-Modified-Since'] = headers['Last-Modified'] return new_headers
def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: body = response.read(decode_content=False) # NOTE: 99% sure this is dead code. I'm only leaving it # here b/c I don't have a test yet to prove # it. Basically, before using # `cachecontrol.filewrapper.CallbackFileWrapper`, # this made an effort to reset the file handle. The # `CallbackFileWrapper` short circuits this code by # setting the body as the content is consumed, the # result being a `body` argument is *always* passed # into cache_response, and in turn, # `Serializer.dump`. response._fp = io.BytesIO(body) data = { "response": { "body": body, "headers": dict(response.headers), "status": response.status, "version": response.version, "reason": response.reason, "strict": response.strict, "decode_content": response.decode_content, }, } # Construct our vary headers data["vary"] = {} if "vary" in response_headers: varied_headers = response_headers['vary'].split(',') for header in varied_headers: header = header.strip() data["vary"][header] = request.headers.get(header, None) return b",".join([b"cc=3", msgpack.dumps(data, use_bin_type=True)])
def send( self, request: PreparedRequest, stream: bool = False, timeout: Optional[Union[float, Tuple[float, float]]] = None, verify: Union[bool, str] = True, cert: Optional[Union[str, Tuple[str, str]]] = None, proxies: Optional[Mapping[str, str]] = None, ) -> Response: pathname = url_to_path(request.url) resp = Response() resp.status_code = 200 resp.url = request.url try: stats = os.stat(pathname) except OSError as exc: # format the exception raised as a io.BytesIO object, # to return a better error message: resp.status_code = 404 resp.reason = type(exc).__name__ resp.raw = io.BytesIO(f"{resp.reason}: {exc}".encode("utf8")) else: modified = email.utils.formatdate(stats.st_mtime, usegmt=True) content_type = mimetypes.guess_type(pathname)[0] or "text/plain" resp.headers = CaseInsensitiveDict( { "Content-Type": content_type, "Content-Length": stats.st_size, "Last-Modified": modified, } ) resp.raw = open(pathname, "rb") resp.close = resp.raw.close return resp
def cached_request(self, request): """ Return a cached response if it exists in the cache, otherwise return False. """ cache_url = self.cache_url(request.url) cc = self.parse_cache_control(request.headers) # non-caching states no_cache = True if 'no-cache' in cc else False if 'max-age' in cc and cc['max-age'] == 0: no_cache = True # Bail out if no-cache was set if no_cache: return False # It is in the cache, so lets see if it is going to be # fresh enough resp = self.serializer.loads(request, self.cache.get(cache_url)) # Check to see if we have a cached object if not resp: return False headers = CaseInsensitiveDict(resp.headers) now = time.time() date = calendar.timegm(parsedate_tz(headers['date'])) current_age = max(0, now - date) # TODO: There is an assumption that the result will be a # urllib3 response object. This may not be best since we # could probably avoid instantiating or constructing the # response until we know we need it. resp_cc = self.parse_cache_control(headers) # determine freshness freshness_lifetime = 0 # Check the max-age pragma in the cache control header if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): freshness_lifetime = int(resp_cc['max-age']) # If there isn't a max-age, check for an expires header elif 'expires' in headers: expires = parsedate_tz(headers['expires']) if expires is not None: expire_time = calendar.timegm(expires) - date freshness_lifetime = max(0, expire_time) # determine if we are setting freshness limit in the req if 'max-age' in cc: try: freshness_lifetime = int(cc['max-age']) except ValueError: freshness_lifetime = 0 if 'min-fresh' in cc: try: min_fresh = int(cc['min-fresh']) except ValueError: min_fresh = 0 # adjust our current age by our min fresh current_age += min_fresh # see how fresh we actually are fresh = (freshness_lifetime > current_age) if fresh: return resp # we're not fresh. If we don't have an Etag, clear it out if 'etag' not in headers: self.cache.delete(cache_url) # return the original handler return False
def cache_response(self, request, response, body=None, status_codes=None): """ Algorithm for caching requests. This assumes a requests Response object. """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests cacheable_status_codes = status_codes or self.cacheable_status_codes if response.status not in cacheable_status_codes: logger.debug('Status code %s not in %s', response.status, cacheable_status_codes) return response_headers = CaseInsensitiveDict(response.headers) # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just # skip trying to cache it. if (body is not None and "content-length" in response_headers and response_headers["content-length"].isdigit() and int(response_headers["content-length"]) != len(body)): return cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) cache_url = self.cache_url(request.url) logger.debug('Updating cache with response from "%s"', cache_url) # Delete it from the cache if we happen to have it stored there no_store = False if 'no-store' in cc: no_store = True logger.debug('Response header has "no-store"') if 'no-store' in cc_req: no_store = True logger.debug('Request header has "no-store"') if no_store and self.cache.get(cache_url): logger.debug('Purging existing cache entry to honor "no-store"') self.cache.delete(cache_url) # If we've been given an etag, then keep the response if self.cache_etags and 'etag' in response_headers: logger.debug('Caching due to etag') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) # Add to the cache any 301s. We do this before looking that # the Date headers. elif response.status == 301: logger.debug('Caching permanant redirect') self.cache.set(cache_url, self.serializer.dumps(request, response)) # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif 'date' in response_headers: # cache when there is a max-age > 0 if 'max-age' in cc and cc['max-age'] > 0: logger.debug('Caching b/c date exists and max-age > 0') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), ) # If the request can expire, it means we should cache it # in the meantime. elif 'expires' in response_headers: if response_headers['expires']: logger.debug('Caching b/c of expires header') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), )
def cached_request(self, request): """ Return a cached response if it exists in the cache, otherwise return False. """ cache_url = self.cache_url(request.url) logger.debug('Looking up "%s" in the cache', cache_url) cc = self.parse_cache_control(request.headers) # Bail out if the request insists on fresh data if 'no-cache' in cc: logger.debug('Request header has "no-cache", cache bypassed') return False if 'max-age' in cc and cc['max-age'] == 0: logger.debug('Request header has "max_age" as 0, cache bypassed') return False # Request allows serving from the cache, let's see if we find something cache_data = self.cache.get(cache_url) if cache_data is None: logger.debug('No cache entry available') return False # Check whether it can be deserialized resp = self.serializer.loads(request, cache_data) if not resp: logger.warning('Cache entry deserialization failed, entry ignored') return False # If we have a cached 301, return it immediately. We don't # need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). if resp.status == 301: msg = ('Returning cached "301 Moved Permanently" response ' '(ignoring date and etag information)') logger.debug(msg) return resp headers = CaseInsensitiveDict(resp.headers) if not headers or 'date' not in headers: if 'etag' not in headers: # Without date or etag, the cached response can never be used # and should be deleted. logger.debug('Purging cached response: no date or etag') self.cache.delete(cache_url) logger.debug('Ignoring cached response: no date') return False now = time.time() date = calendar.timegm(parsedate_tz(headers['date'])) current_age = max(0, now - date) logger.debug('Current age based on date: %i', current_age) # TODO: There is an assumption that the result will be a # urllib3 response object. This may not be best since we # could probably avoid instantiating or constructing the # response until we know we need it. resp_cc = self.parse_cache_control(headers) # determine freshness freshness_lifetime = 0 # Check the max-age pragma in the cache control header if 'max-age' in resp_cc: freshness_lifetime = resp_cc['max-age'] logger.debug('Freshness lifetime from max-age: %i', freshness_lifetime) # If there isn't a max-age, check for an expires header elif 'expires' in headers: expires = parsedate_tz(headers['expires']) if expires is not None: expire_time = calendar.timegm(expires) - date freshness_lifetime = max(0, expire_time) logger.debug("Freshness lifetime from expires: %i", freshness_lifetime) # Determine if we are setting freshness limit in the # request. Note, this overrides what was in the response. if 'max-age' in cc: freshness_lifetime = cc['max-age'] logger.debug('Freshness lifetime from request max-age: %i', freshness_lifetime) if 'min-fresh' in cc: min_fresh = cc['min-fresh'] # adjust our current age by our min fresh current_age += min_fresh logger.debug('Adjusted current age from min-fresh: %i', current_age) # Return entry if it is fresh enough if freshness_lifetime > current_age: logger.debug('The response is "fresh", returning cached response') logger.debug('%i > %i', freshness_lifetime, current_age) return resp # we're not fresh. If we don't have an Etag, clear it out if 'etag' not in headers: logger.debug( 'The cached response is "stale" with no etag, purging') self.cache.delete(cache_url) # return the original handler return False
def cached_request(self, request): """ Return a cached response if it exists in the cache, otherwise return False. """ cache_url = self.cache_url(request.url) cc = self.parse_cache_control(request.headers) # non-caching states no_cache = True if 'no-cache' in cc else False if 'max-age' in cc and cc['max-age'] == 0: no_cache = True # Bail out if no-cache was set if no_cache: return False # It is in the cache, so lets see if it is going to be # fresh enough resp = self.serializer.loads(request, self.cache.get(cache_url)) # Check to see if we have a cached object if not resp: return False # If we have a cached 301, return it immediately. We don't # need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). if resp.status == 301: return resp headers = CaseInsensitiveDict(resp.headers) if not headers or 'date' not in headers: # With date or etag, the cached response can never be used # and should be deleted. if 'etag' not in headers: self.cache.delete(cache_url) return False now = time.time() date = calendar.timegm(parsedate_tz(headers['date'])) current_age = max(0, now - date) # TODO: There is an assumption that the result will be a # urllib3 response object. This may not be best since we # could probably avoid instantiating or constructing the # response until we know we need it. resp_cc = self.parse_cache_control(headers) # determine freshness freshness_lifetime = 0 # Check the max-age pragma in the cache control header if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): freshness_lifetime = int(resp_cc['max-age']) # If there isn't a max-age, check for an expires header elif 'expires' in headers: expires = parsedate_tz(headers['expires']) if expires is not None: expire_time = calendar.timegm(expires) - date freshness_lifetime = max(0, expire_time) # determine if we are setting freshness limit in the req if 'max-age' in cc: try: freshness_lifetime = int(cc['max-age']) except ValueError: freshness_lifetime = 0 if 'min-fresh' in cc: try: min_fresh = int(cc['min-fresh']) except ValueError: min_fresh = 0 # adjust our current age by our min fresh current_age += min_fresh # see how fresh we actually are fresh = (freshness_lifetime > current_age) if fresh: return resp # we're not fresh. If we don't have an Etag, clear it out if 'etag' not in headers: self.cache.delete(cache_url) # return the original handler return False
class Response(object): """The :class:`Response <Response>` object, which contains a server's response to an HTTP request. """ __attrs__ = [ '_content', 'status_code', 'headers', 'url', 'history', 'encoding', 'reason', 'cookies', 'elapsed', 'request', ] def __init__(self): super(Response, self).__init__() self._content = False self._content_consumed = False #: Integer Code of responded HTTP Status. self.status_code = None #: Case-insensitive Dictionary of Response Headers. #: For example, ``headers['content-encoding']`` will return the #: value of a ``'Content-Encoding'`` response header. self.headers = CaseInsensitiveDict() #: File-like object representation of response (for advanced usage). #: Requires that ``stream=True` on the request. # This requirement does not apply for use internally to Requests. self.raw = None #: Final URL location of Response. self.url = None #: Encoding to decode with when accessing r.text. self.encoding = None #: A list of :class:`Response <Response>` objects from #: the history of the Request. Any redirect responses will end #: up here. The list is sorted from the oldest to the most recent request. self.history = [] self.reason = None #: A CookieJar of Cookies the server sent back. self.cookies = cookiejar_from_dict({}) #: The amount of time elapsed between sending the request #: and the arrival of the response (as a timedelta) self.elapsed = datetime.timedelta(0) def __getstate__(self): # Consume everything; accessing the content attribute makes # sure the content has been fully read. if not self._content_consumed: self.content return dict( (attr, getattr(self, attr, None)) for attr in self.__attrs__ ) def __setstate__(self, state): for name, value in state.items(): setattr(self, name, value) # pickled objects do not have .raw setattr(self, '_content_consumed', True) def __repr__(self): return '<Response [%s]>' % (self.status_code) def __bool__(self): """Returns true if :attr:`status_code` is 'OK'.""" return self.ok def __nonzero__(self): """Returns true if :attr:`status_code` is 'OK'.""" return self.ok def __iter__(self): """Allows you to use a response as an iterator.""" return self.iter_content(128) @property def ok(self): try: self.raise_for_status() except RequestException: return False return True @property def apparent_encoding(self): """The apparent encoding, provided by the lovely Charade library (Thanks, Ian!).""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. """ if self._content_consumed: # simulate reading small chunks of the content return iter_slices(self._content, chunk_size) def generate(): try: # Special case for urllib3. try: for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk except IncompleteRead as e: raise ChunkedEncodingError(e) except AttributeError: # Standard file-like object. while True: chunk = self.raw.read(chunk_size) if not chunk: break yield chunk self._content_consumed = True gen = generate() if decode_unicode: gen = stream_decode_response_unicode(gen, self) return gen def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. """ pending = None for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): if pending is not None: chunk = pending + chunk lines = chunk.splitlines() if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]: pending = lines.pop() else: pending = None for line in lines: yield line if pending is not None: yield pending @property def content(self): """Content of the response, in bytes.""" if self._content is False: # Read the contents. try: if self._content_consumed: raise RuntimeError( 'The content for this response was already consumed') if self.status_code == 0: self._content = None else: self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes() except AttributeError: self._content = None self._content_consumed = True # don't need to release the connection; that's been handled by urllib3 # since we exhausted the data. return self._content @property def text(self): """Content of the response, in unicode. If Response.encoding is None, encoding will be guessed using ``charade``. """ # Try charset from content-type content = None encoding = self.encoding if not self.content: return str('') # Fallback to auto-detected encoding. if self.encoding is None: encoding = self.apparent_encoding # Decode unicode from given encoding. try: content = str(self.content, encoding, errors='replace') except (LookupError, TypeError): # A LookupError is raised if the encoding was not found which could # indicate a misspelling or similar mistake. # # A TypeError can be raised if encoding is None # # So we try blindly encoding. content = str(self.content, errors='replace') return content def json(self, **kwargs): """Returns the json-encoded content of a response, if any. :param \*\*kwargs: Optional arguments that ``json.loads`` takes. """ if not self.encoding and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or # decoding fails, fall back to `self.text` (using chardet to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: return json.loads(self.content.decode(encoding), **kwargs) return json.loads(self.text, **kwargs) @property def links(self): """Returns the parsed header links of the response, if any.""" header = self.headers.get('link') # l = MultiDict() l = {} if header: links = parse_header_links(header) for link in links: key = link.get('rel') or link.get('url') l[key] = link return l def raise_for_status(self): """Raises stored :class:`HTTPError`, if one occurred.""" http_error_msg = '' if 400 <= self.status_code < 500: http_error_msg = '%s Client Error: %s' % (self.status_code, self.reason) elif 500 <= self.status_code < 600: http_error_msg = '%s Server Error: %s' % (self.status_code, self.reason) if http_error_msg: raise HTTPError(http_error_msg, response=self) def close(self): """Closes the underlying file descriptor and releases the connection back to the pool. *Note: Should not normally need to be called explicitly.* """ return self.raw.release_conn()
# This case is also handled in the controller code when creating # a cache entry, but is left here for backwards compatibility. ======= >>>>>>> 71358189c5e72ee2ac9883b408a2f540a7f5745e if "*" in cached.get("vary", {}): return # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: return body_raw = cached["response"].pop("body") headers = CaseInsensitiveDict(data=cached["response"]["headers"]) if headers.get("transfer-encoding", "") == "chunked": headers.pop("transfer-encoding") cached["response"]["headers"] = headers try: body = io.BytesIO(body_raw) except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as # a Python 3 str (unicode string), which will cause the above to # fail with: # # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode("utf8"))
def cache_response(self, request, response, body=None, status_codes=None): """ Algorithm for caching requests. This assumes a requests Response object. """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests cacheable_status_codes = status_codes or self.cacheable_status_codes if response.status not in cacheable_status_codes: logger.debug("Status code %s not in %s", response.status, cacheable_status_codes) return response_headers = CaseInsensitiveDict(response.headers) # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just # skip trying to cache it. if (body is not None and "content-length" in response_headers and response_headers["content-length"].isdigit() and int(response_headers["content-length"]) != len(body)): return cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) cache_url = self.cache_url(request.url) logger.debug('Updating cache with response from "%s"', cache_url) # Delete it from the cache if we happen to have it stored there no_store = False if "no-store" in cc: no_store = True logger.debug('Response header has "no-store"') if "no-store" in cc_req: no_store = True logger.debug('Request header has "no-store"') if no_store and self.cache.get(cache_url): logger.debug('Purging existing cache entry to honor "no-store"') self.cache.delete(cache_url) if no_store: return # https://tools.ietf.org/html/rfc7234#section-4.1: # A Vary header field-value of "*" always fails to match. # Storing such a response leads to a deserialization warning # during cache lookup and is not allowed to ever be served, # so storing it can be avoided. if "*" in response_headers.get("vary", ""): logger.debug('Response header has "Vary: *"') return # If we've been given an etag, then keep the response if self.cache_etags and "etag" in response_headers: logger.debug("Caching due to etag") self.cache.set(cache_url, self.serializer.dumps(request, response, body=body)) # Add to the cache any 301s. We do this before looking that # the Date headers. elif response.status == 301: logger.debug("Caching permanant redirect") self.cache.set(cache_url, self.serializer.dumps(request, response)) # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif "date" in response_headers: # cache when there is a max-age > 0 if "max-age" in cc and cc["max-age"] > 0: logger.debug("Caching b/c date exists and max-age > 0") self.cache.set( cache_url, self.serializer.dumps(request, response, body=body)) # If the request can expire, it means we should cache it # in the meantime. elif "expires" in response_headers: if response_headers["expires"]: logger.debug("Caching b/c of expires header") self.cache.set( cache_url, self.serializer.dumps(request, response, body=body))
if not resp: return False <<<<<<< HEAD # If we have a cached 301, return it immediately. We don't # need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). if resp.status == 301: return resp headers = CaseInsensitiveDict(resp.headers) if not headers or 'date' not in headers: # With date or etag, the cached response can never be used # and should be deleted. if 'etag' not in headers: self.cache.delete(cache_url) return False ======= headers = CaseInsensitiveDict(resp.headers) >>>>>>> bde4533e29dfedadf6bcf9d451baa615bc828a59 now = time.time() date = calendar.timegm( parsedate_tz(headers['date']) ) current_age = max(0, now - date)