def format_options(self, red): "Return things that the user can do with the URI as HTML links" options = [] media_type = red.parsed_hdrs.get('content-type', [""])[0] options.append( (u"response headers: %s bytes" % \ f_num(red.header_length), "how large the response headers are, including the status line" ) ) options.append((u"body: %s bytes" % f_num(red.res_body_len), "how large the response body is")) transfer_overhead = red.transfer_length - \ red.res_body_len if transfer_overhead > 0: options.append( ( u"transfer overhead: %s bytes" % f_num(transfer_overhead), "how much using chunked encoding adds to the response size" ) ) options.append(None) options.append((u"""\ <a href='#' id='body_view' accesskey='b'>view body</a>""", "View this response body (with any gzip compression removed)" )) if self.kw.get('test_id', None): har_locator = "id=%s" % self.kw['test_id'] else: har_locator = self.req_qs(red.uri) options.append( (u"""\ <a href='?%s&format=har' accesskey='h'>view har</a>""" % har_locator, "View a HAR (HTTP ARchive, a JSON format) file for this response" )) if not self.kw.get('is_saved', False): if self.kw.get('allow_save', False): options.append(( u"<a href='#' id='save' accesskey='s'>save</a>", "Save these results for future reference" )) if self.validators.has_key(media_type): options.append( ( u"<a href='%s' accesskey='v'>validate body</a>" % self.validators[media_type] % e_query_arg(red.uri), "" ) ) if hasattr(red, "link_count") and red.link_count > 0: options.append(( u"<a href='?descend=True&%s' accesskey='a'>check embedded</a>" % \ self.req_qs(red.uri), "run RED on images, frames and embedded links" )) return nl.join( [o and "<span class='option' title='%s'>%s</span>" % (o[1], o[0]) or "<br>" for o in options] )
def done(self): if not self.state.res_complete: self.set_message('', rs.RANGE_SUBREQ_PROBLEM, problem=self.state.res_error.desc ) return if self.state.res_status == '206': ce = 'content-encoding' if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \ ('gzip' not in self.state.parsed_hdrs.get(ce, [])): self.set_message( 'header-accept-ranges header-content-encoding', rs.RANGE_NEG_MISMATCH ) return if not [True for h in self.base.orig_req_hdrs if h[0].lower() == 'if-range']: self.check_missing_hdrs([ 'date', 'cache-control', 'content-location', 'etag', 'expires', 'vary' ], rs.MISSING_HDRS_206, 'Range' ) if self.state.parsed_hdrs.get('etag', 1) == \ self.base.parsed_hdrs.get('etag', 2): if self.state.res_body == self.range_target: self.base.partial_support = True self.set_message('header-accept-ranges', rs.RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_INCORRECT, range="bytes=%s-%s" % ( self.range_start, self.range_end ), range_expected = \ self.range_target.encode('string_escape'), range_expected_bytes = f_num(len(self.range_target)), range_received = \ self.state.res_body.encode('string_escape'), range_received_bytes = f_num(self.state.res_body_len) ) else: self.set_message('header-accept-ranges', rs.RANGE_CHANGED) # TODO: address 416 directly elif self.state.res_status == self.base.res_status: self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_FULL) else: self.set_message('header-accept-ranges', rs.RANGE_STATUS, range_status=self.state.res_status, enc_range_status=self.state.res_status or '(unknown)' )
def done(self): if not self.state.res_complete: self.set_message('', rs.RANGE_SUBREQ_PROBLEM, problem=self.state.res_error.desc) return if self.state.res_status == '206': ce = 'content-encoding' if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \ ('gzip' not in self.state.parsed_hdrs.get(ce, [])): self.set_message( 'header-accept-ranges header-content-encoding', rs.RANGE_NEG_MISMATCH) return if not [ True for h in self.base.orig_req_hdrs if h[0].lower() == 'if-range' ]: self.check_missing_hdrs([ 'date', 'cache-control', 'content-location', 'etag', 'expires', 'vary' ], rs.MISSING_HDRS_206, 'Range') if self.state.parsed_hdrs.get('etag', 1) == \ self.base.parsed_hdrs.get('etag', 2): if self.state.res_body == self.range_target: self.base.partial_support = True self.set_message('header-accept-ranges', rs.RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_INCORRECT, range="bytes=%s-%s" % ( self.range_start, self.range_end ), range_expected = \ self.range_target.encode('string_escape'), range_expected_bytes = f_num(len(self.range_target)), range_received = \ self.state.res_body.encode('string_escape'), range_received_bytes = f_num(self.state.res_body_len) ) else: self.set_message('header-accept-ranges', rs.RANGE_CHANGED) # TODO: address 416 directly elif self.state.res_status == self.base.res_status: self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_FULL) else: self.set_message('header-accept-ranges', rs.RANGE_STATUS, range_status=self.state.res_status, enc_range_status=self.state.res_status or '(unknown)')
def __init__(self, uri, method="GET", req_hdrs=None, req_body=None, status_cb=None, body_procs=None): orig_req_hdrs = req_hdrs or [] rh = orig_req_hdrs + [('Accept-Encoding', 'gzip')] RedFetcher.__init__(self, uri, method, rh, req_body, status_cb, body_procs, req_type=method) # Extra metadata that the "main" RED will be adorned with self.state.orig_req_hdrs = orig_req_hdrs self.state.age = None self.state.store_shared = None self.state.store_private = None self.state.freshness_lifetime = None self.state.stale_serveable = None self.state.partial_support = None self.state.inm_support = None self.state.ims_support = None self.state.gzip_support = None self.state.gzip_savings = 0 # check the URI if not re.match("^\s*%s\s*$" % absolute_URI, uri, re.VERBOSE): self.state.set_message('uri', rs.URI_BAD_SYNTAX) if len(uri) > max_uri: self.state.set_message('uri', rs.URI_TOO_LONG, uri_len=f_num(len(uri)) )
def _response_done(self, trailers): "Finish anaylsing the response, handling any parse errors." self._st.append('_response_done()') state = self.state state.res_complete = True state.res_done_ts = thor.time() state.transfer_length = self.exchange.input_transfer_length state.header_length = self.exchange.input_header_length # TODO: check trailers if self.status_cb and state.type: self.status_cb("fetched %s (%s)" % (state.uri, state.type)) state.res_body_md5 = self._md5_processor.digest() state.res_body_post_md5 = self._md5_post_processor.digest() checkCaching(state) if state.method not in ['HEAD'] and state.res_status not in ['304']: # check payload basics if state.parsed_hdrs.has_key('content-length'): if state.res_body_len == state.parsed_hdrs['content-length']: state.set_message('header-content-length', rs.CL_CORRECT) else: state.set_message('header-content-length', rs.CL_INCORRECT, body_length=f_num(state.res_body_len) ) if state.parsed_hdrs.has_key('content-md5'): c_md5_calc = base64.encodestring(state.res_body_md5)[:-1] if state.parsed_hdrs['content-md5'] == c_md5_calc: state.set_message('header-content-md5', rs.CMD5_CORRECT) else: state.set_message('header-content-md5', rs.CMD5_INCORRECT, calc_md5=c_md5_calc) self.done() self.finish_task()
def _response_done(self, trailers): "Finish analysing the response, handling any parse errors." self._st.append('_response_done()') state = self.state state.res_complete = True state.res_done_ts = thor.time() state.transfer_length = self.exchange.input_transfer_length state.header_length = self.exchange.input_header_length # TODO: check trailers if self.status_cb and state.type: self.status_cb("fetched %s (%s)" % (state.uri, state.type)) state.res_body_md5 = self._md5_processor.digest() state.res_body_post_md5 = self._md5_post_processor.digest() checkCaching(state) if state.method not in ['HEAD'] and state.res_status not in ['304']: # check payload basics if state.parsed_hdrs.has_key('content-length'): if state.res_body_len == state.parsed_hdrs['content-length']: state.set_message('header-content-length', rs.CL_CORRECT) else: state.set_message('header-content-length', rs.CL_INCORRECT, body_length=f_num(state.res_body_len)) if state.parsed_hdrs.has_key('content-md5'): c_md5_calc = base64.encodestring(state.res_body_md5)[:-1] if state.parsed_hdrs['content-md5'] == c_md5_calc: state.set_message('header-content-md5', rs.CMD5_CORRECT) else: state.set_message('header-content-md5', rs.CMD5_INCORRECT, calc_md5=c_md5_calc) self.done() self.finish_task()
def done(self): if self.state.res_status == '206': # TODO: check entity headers # TODO: check content-range ce = 'content-encoding' if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \ ('gzip' not in self.state.parsed_hdrs.get(ce, [])): self.set_message( 'header-accept-ranges header-content-encoding', rs.RANGE_NEG_MISMATCH ) return if self.state.parsed_hdrs.get('etag', 1) == \ self.base.parsed_hdrs.get('etag', 2): if self.state.res_body == self.range_target: self.base.partial_support = True self.set_message('header-accept-ranges', rs.RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_INCORRECT, range="bytes=%s-%s" % ( self.range_start, self.range_end ), range_expected=e( self.range_target.encode('string_escape') ), range_expected_bytes = f_num(len(self.range_target)), range_received = e( self.state.res_body.encode('string_escape') ), range_received_bytes = f_num(self.state.res_body_len) ) else: self.set_message('header-accept-ranges', rs.RANGE_CHANGED) # TODO: address 416 directly elif self.state.res_status == self.base.res_status: self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_FULL) else: self.set_message('header-accept-ranges', rs.RANGE_STATUS, range_status=self.state.res_status, enc_range_status=e(self.state.res_status or '(unknown)') )
def done(self): if self.state.res_status == '206': # TODO: check entity headers # TODO: check content-range ce = 'content-encoding' if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \ ('gzip' not in self.state.parsed_hdrs.get(ce, [])): self.set_message( 'header-accept-ranges header-content-encoding', rs.RANGE_NEG_MISMATCH) return if self.state.parsed_hdrs.get('etag', 1) == \ self.base.parsed_hdrs.get('etag', 2): if self.state.res_body == self.range_target: self.base.partial_support = True self.set_message('header-accept-ranges', rs.RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.set_message( 'header-accept-ranges', rs.RANGE_INCORRECT, range="bytes=%s-%s" % (self.range_start, self.range_end), range_expected=e( self.range_target.encode('string_escape')), range_expected_bytes=f_num(len(self.range_target)), range_received=e( self.state.res_body.encode('string_escape')), range_received_bytes=f_num(self.state.res_body_len)) else: self.set_message('header-accept-ranges', rs.RANGE_CHANGED) # TODO: address 416 directly elif self.state.res_status == self.base.res_status: self.base.partial_support = False self.set_message('header-accept-ranges', rs.RANGE_FULL) else: self.set_message('header-accept-ranges', rs.RANGE_STATUS, range_status=self.state.res_status, enc_range_status=e(self.state.res_status or '(unknown)'))
def _response_body(self, chunk): "Process a chunk of the response body." state = self.state state.res_body_sample.append((state.res_body_len, chunk)) if len(state.res_body_sample) > 4: state.res_body_sample.pop(0) self._md5_processor.update(chunk) state.res_body_len += len(chunk) if state.res_status == "206": # Store only partial responses completely, for error reporting state.res_body += chunk state.res_body_decode_len += len(chunk) # Don't actually try to make sense of a partial body... return content_codings = state.parsed_hdrs.get('content-encoding', []) content_codings.reverse() for coding in content_codings: # TODO: deflate support if coding == 'gzip' and self._gzip_ok: if not self._in_gzip_body: self._gzip_header_buffer += chunk try: chunk = self._read_gzip_header( self._gzip_header_buffer ) self._in_gzip_body = True except IndexError: return # not a full header yet except IOError, gzip_error: state.set_message('header-content-encoding', rs.BAD_GZIP, gzip_error=str(gzip_error) ) self._gzip_ok = False return try: chunk = self._gzip_processor.decompress(chunk) except zlib.error, zlib_error: state.set_message( 'header-content-encoding', rs.BAD_ZLIB, zlib_error=str(zlib_error), ok_zlib_len=f_num(state.res_body_sample[-1][0]), chunk_sample=chunk[:20].encode('string_escape') ) self._gzip_ok = False return
def _response_body(self, chunk): "Process a chunk of the response body." state = self.state state.res_body_sample.append((state.res_body_len, chunk)) if len(state.res_body_sample) > 4: state.res_body_sample.pop(0) self._md5_processor.update(chunk) state.res_body_len += len(chunk) if state.res_status == "206": # Store only partial responses completely, for error reporting state.res_body += chunk state.res_body_decode_len += len(chunk) # Don't actually try to make sense of a partial body... return content_codings = state.parsed_hdrs.get('content-encoding', []) content_codings.reverse() for coding in content_codings: # TODO: deflate support if coding == 'gzip' and self._gzip_ok: if not self._in_gzip_body: self._gzip_header_buffer += chunk try: chunk = self._read_gzip_header( self._gzip_header_buffer) self._in_gzip_body = True except IndexError: return # not a full header yet except IOError, gzip_error: state.set_message('header-content-encoding', rs.BAD_GZIP, gzip_error=str(gzip_error)) self._gzip_ok = False return try: chunk = self._gzip_processor.decompress(chunk) except zlib.error, zlib_error: state.set_message( 'header-content-encoding', rs.BAD_ZLIB, zlib_error=str(zlib_error), ok_zlib_len=f_num(state.res_body_sample[-1][0]), chunk_sample=chunk[:20].encode('string_escape')) self._gzip_ok = False return
def __init__(self, uri, method="GET", req_hdrs=None, req_body=None, status_cb=None, body_procs=None): orig_req_hdrs = req_hdrs or [] rh = orig_req_hdrs + [(u'Accept-Encoding', u'gzip')] RedFetcher.__init__(self, uri, method, rh, req_body, status_cb, body_procs, req_type=method) # Extra metadata that the "main" RED will be adorned with self.state.orig_req_hdrs = orig_req_hdrs self.state.age = None self.state.store_shared = None self.state.store_private = None self.state.freshness_lifetime = None self.state.stale_serveable = None self.state.partial_support = None self.state.inm_support = None self.state.ims_support = None self.state.gzip_support = None self.state.gzip_savings = 0 # check the URI if not re.match("^\s*%s\s*$" % URI, uri, re.VERBOSE): self.state.set_message('uri', rs.URI_BAD_SYNTAX) if '#' in uri: # chop off the fragment uri = uri[:uri.index('#')] if len(uri) > max_uri: self.state.set_message('uri', rs.URI_TOO_LONG, uri_len=f_num(len(uri)))
def done(self): # see if it was compressed when not negotiated no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', []) if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \ 'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []): self.set_message('header-vary header-content-encoding', rs.CONNEG_GZIP_WITHOUT_ASKING) else: # Apparently, content negotiation is happening. # check status if self.base.res_status != self.state.res_status: self.set_message('status', rs.VARY_STATUS_MISMATCH, neg_status=self.base.res_status, noneg_status=self.state.res_status) return # Can't be sure what's going on... # check headers that should be invariant for hdr in ['content-type']: if self.base.parsed_hdrs.get(hdr) != \ self.state.parsed_hdrs.get(hdr, None): self.set_message('header-%s' % hdr, rs.VARY_HEADER_MISMATCH, header=hdr) # TODO: expose on-the-wire values. # check Vary headers vary_headers = self.base.parsed_hdrs.get('vary', []) if (not "accept-encoding" in vary_headers) and \ (not "*" in vary_headers): self.set_message('header-vary', rs.CONNEG_NO_VARY) if no_conneg_vary_headers != vary_headers: self.set_message('header-vary', rs.VARY_INCONSISTENT, conneg_vary=e(", ".join(vary_headers)), no_conneg_vary=e( ", ".join(no_conneg_vary_headers))) # check body if self.base.res_body_post_md5 != self.state.res_body_md5: self.set_message('body', rs.VARY_BODY_MISMATCH) return # Can't be sure what's going on... # check ETag if self.state.parsed_hdrs.get('etag', 1) \ == self.base.parsed_hdrs.get('etag', 2): self.set_message('header-etag', rs.VARY_ETAG_DOESNT_CHANGE) # TODO: weakness? # check compression efficiency if self.state.res_body_len > 0: savings = int(100 * ( (float(self.state.res_body_len) - \ self.base.res_body_len ) / self.state.res_body_len ) ) else: savings = 0 self.base.gzip_support = True self.base.gzip_savings = savings if savings >= 0: self.set_message('header-content-encoding', rs.CONNEG_GZIP_GOOD, savings=savings, orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len)) else: self.set_message('header-content-encoding', rs.CONNEG_GZIP_BAD, savings=abs(savings), orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len))
def done(self): if not self.state.res_complete: self.set_message('', rs.CONNEG_SUBREQ_PROBLEM, problem=self.state.res_error.desc ) return # see if it was compressed when not negotiated no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', []) if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \ 'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []): self.set_message('header-vary header-content-encoding', rs.CONNEG_GZIP_WITHOUT_ASKING) else: # Apparently, content negotiation is happening. # check status if self.base.res_status != self.state.res_status: self.set_message('status', rs.VARY_STATUS_MISMATCH, neg_status=self.base.res_status, noneg_status=self.state.res_status) return # Can't be sure what's going on... # check headers that should be invariant for hdr in ['content-type']: if self.base.parsed_hdrs.get(hdr) != \ self.state.parsed_hdrs.get(hdr, None): self.set_message('header-%s' % hdr, rs.VARY_HEADER_MISMATCH, header=hdr) # TODO: expose on-the-wire values. # check Vary headers vary_headers = self.base.parsed_hdrs.get('vary', []) if (not "accept-encoding" in vary_headers) and \ (not "*" in vary_headers): self.set_message('header-vary', rs.CONNEG_NO_VARY) if no_conneg_vary_headers != vary_headers: self.set_message('header-vary', rs.VARY_INCONSISTENT, conneg_vary=", ".join(vary_headers), no_conneg_vary=", ".join(no_conneg_vary_headers) ) # check body if self.base.res_body_post_md5 != self.state.res_body_md5: self.set_message('body', rs.VARY_BODY_MISMATCH) return # Can't be sure what's going on... # check ETag if self.state.parsed_hdrs.get('etag', 1) \ == self.base.parsed_hdrs.get('etag', 2): self.set_message('header-etag', rs.VARY_ETAG_DOESNT_CHANGE) # TODO: weakness? # check compression efficiency if self.state.res_body_len > 0: savings = int(100 * ( (float(self.state.res_body_len) - \ self.base.res_body_len ) / self.state.res_body_len ) ) else: savings = 0 self.base.gzip_support = True self.base.gzip_savings = savings if savings >= 0: self.set_message('header-content-encoding', rs.CONNEG_GZIP_GOOD, savings=savings, orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len) ) else: self.set_message('header-content-encoding', rs.CONNEG_GZIP_BAD, savings=abs(savings), orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len) )
def checkCaching(state): "Examine HTTP caching characteristics." # TODO: check URI for query string, message about HTTP/1.0 if so # get header values lm = state.parsed_hdrs.get('last-modified', None) date = state.parsed_hdrs.get('date', None) cc_set = state.parsed_hdrs.get('cache-control', []) cc_list = [k for (k, v) in cc_set] cc_dict = dict(cc_set) cc_keys = cc_dict.keys() # Last-Modified if lm: serv_date = date or state.res_ts if lm > (date or serv_date): state.set_message('header-last-modified', rs.LM_FUTURE) else: state.set_message('header-last-modified', rs.LM_PRESENT, last_modified_string=rh.relative_time( lm, serv_date)) # known Cache-Control directives that don't allow duplicates known_cc = [ "max-age", "no-store", "s-maxage", "public", "private", "pre-check", "post-check", "stale-while-revalidate", "stale-if-error", ] # check for mis-capitalised directives / # assure there aren't any dup directives with different values for cc in cc_keys: if cc.lower() in known_cc and cc != cc.lower(): state.set_message('header-cache-control', rs.CC_MISCAP, cc_lower=cc.lower(), cc=cc) if cc in known_cc and cc_list.count(cc) > 1: state.set_message('header-cache-control', rs.CC_DUP, cc=cc) # Who can store this? if state.method not in cacheable_methods: state.store_shared = state.store_private = False state.set_message('method', rs.METHOD_UNCACHEABLE, method=state.method) return # bail; nothing else to see here elif 'no-store' in cc_keys: state.store_shared = state.store_private = False state.set_message('header-cache-control', rs.NO_STORE) return # bail; nothing else to see here elif 'private' in cc_keys: state.store_shared = False state.store_private = True state.set_message('header-cache-control', rs.PRIVATE_CC) elif 'authorization' in [k.lower() for k, v in state.req_hdrs] and \ not 'public' in cc_keys: state.store_shared = False state.store_private = True state.set_message('header-cache-control', rs.PRIVATE_AUTH) else: state.store_shared = state.store_private = True state.set_message('header-cache-control', rs.STOREABLE) # no-cache? if 'no-cache' in cc_keys: if "last-modified" not in state.parsed_hdrs.keys() and \ "etag" not in state.parsed_hdrs.keys(): state.set_message('header-cache-control', rs.NO_CACHE_NO_VALIDATOR) else: state.set_message('header-cache-control', rs.NO_CACHE) return # pre-check / post-check if 'pre-check' in cc_keys or 'post-check' in cc_keys: if 'pre-check' not in cc_keys or 'post-check' not in cc_keys: state.set_message('header-cache-control', rs.CHECK_SINGLE) else: pre_check = post_check = None try: pre_check = int(cc_dict['pre-check']) post_check = int(cc_dict['post-check']) except ValueError: state.set_message('header-cache-control', rs.CHECK_NOT_INTEGER) if pre_check is not None and post_check is not None: if pre_check == 0 and post_check == 0: state.set_message('header-cache-control', rs.CHECK_ALL_ZERO) elif post_check > pre_check: state.set_message('header-cache-control', rs.CHECK_POST_BIGGER) post_check = pre_check elif post_check == 0: state.set_message('header-cache-control', rs.CHECK_POST_ZERO) else: state.set_message('header-cache-control', rs.CHECK_POST_PRE, pre_check=pre_check, post_check=post_check) # vary? vary = state.parsed_hdrs.get('vary', set()) if "*" in vary: state.set_message('header-vary', rs.VARY_ASTERISK) return # bail; nothing else to see here elif len(vary) > 3: state.set_message('header-vary', rs.VARY_COMPLEX, vary_count=f_num(len(vary))) else: if "user-agent" in vary: state.set_message('header-vary', rs.VARY_USER_AGENT) if "host" in vary: state.set_message('header-vary', rs.VARY_HOST) # TODO: enumerate the axes in a message # calculate age age_hdr = state.parsed_hdrs.get('age', 0) date_hdr = state.parsed_hdrs.get('date', 0) if date_hdr > 0: apparent_age = max(0, int(state.res_ts - date_hdr)) else: apparent_age = 0 current_age = max(apparent_age, age_hdr) current_age_str = relative_time(current_age, 0, 0) age_str = relative_time(age_hdr, 0, 0) state.age = age_hdr if age_hdr >= 1: state.set_message('header-age header-date', rs.CURRENT_AGE, age=age_str) # Check for clock skew and dateless origin server. skew = date_hdr - state.res_ts + age_hdr if not date_hdr: state.set_message('', rs.DATE_CLOCKLESS) if state.parsed_hdrs.has_key('expires') or \ state.parsed_hdrs.has_key('last-modified'): state.set_message('header-expires header-last-modified', rs.DATE_CLOCKLESS_BAD_HDR) elif age_hdr > max_clock_skew and current_age - skew < max_clock_skew: state.set_message('header-date header-age', rs.AGE_PENALTY) elif abs(skew) > max_clock_skew: state.set_message('header-date', rs.DATE_INCORRECT, clock_skew_string=relative_time(skew, 0, 2)) else: state.set_message('header-date', rs.DATE_CORRECT) # calculate freshness freshness_lifetime = 0 has_explicit_freshness = False has_cc_freshness = False freshness_hdrs = ['header-date'] if 's-maxage' in cc_keys: # TODO: differentiate message for s-maxage freshness_lifetime = cc_dict['s-maxage'] freshness_hdrs.append('header-cache-control') has_explicit_freshness = True has_cc_freshness = True elif 'max-age' in cc_keys: freshness_lifetime = cc_dict['max-age'] freshness_hdrs.append('header-cache-control') has_explicit_freshness = True has_cc_freshness = True elif state.parsed_hdrs.has_key('expires'): has_explicit_freshness = True freshness_hdrs.append('header-expires') if state.parsed_hdrs.has_key('date'): freshness_lifetime = state.parsed_hdrs['expires'] - \ state.parsed_hdrs['date'] else: freshness_lifetime = state.parsed_hdrs['expires'] - \ state.res_ts # ? freshness_left = freshness_lifetime - current_age freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0) freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0) state.freshness_lifetime = freshness_lifetime fresh = freshness_left > 0 if has_explicit_freshness: if fresh: state.set_message(" ".join(freshness_hdrs), rs.FRESHNESS_FRESH, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str) elif has_cc_freshness and state.age > freshness_lifetime: state.set_message(" ".join(freshness_hdrs), rs.FRESHNESS_STALE_CACHE, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str) else: state.set_message(" ".join(freshness_hdrs), rs.FRESHNESS_STALE_ALREADY, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str) # can heuristic freshness be used? elif state.res_status in heuristic_cacheable_status: state.set_message('header-last-modified', rs.FRESHNESS_HEURISTIC) else: state.set_message('', rs.FRESHNESS_NONE) # can stale responses be served? if 'must-revalidate' in cc_keys: if fresh: state.set_message('header-cache-control', rs.FRESH_MUST_REVALIDATE) elif has_explicit_freshness: state.set_message('header-cache-control', rs.STALE_MUST_REVALIDATE) elif 'proxy-revalidate' in cc_keys or 's-maxage' in cc_keys: if fresh: state.set_message('header-cache-control', rs.FRESH_PROXY_REVALIDATE) elif has_explicit_freshness: state.set_message('header-cache-control', rs.STALE_PROXY_REVALIDATE) else: if fresh: state.set_message('header-cache-control', rs.FRESH_SERVABLE) elif has_explicit_freshness: state.set_message('header-cache-control', rs.STALE_SERVABLE) # public? if 'public' in cc_keys: # TODO: check for authentication in request state.set_message('header-cache-control', rs.PUBLIC)
def checkCaching(state): "Examine HTTP caching characteristics." # TODO: check URI for query string, message about HTTP/1.0 if so # known Cache-Control directives that don't allow duplicates known_cc = [ "max-age", "no-store", "s-maxage", "public", "private", "pre-check", "post-check", "stale-while-revalidate", "stale-if-error", ] cc_set = state.parsed_hdrs.get("cache-control", []) cc_list = [k for (k, v) in cc_set] cc_dict = dict(cc_set) cc_keys = cc_dict.keys() # check for mis-capitalised directives / # assure there aren't any dup directives with different values for cc in cc_keys: if cc.lower() in known_cc and cc != cc.lower(): state.set_message("header-cache-control", rs.CC_MISCAP, cc_lower=cc.lower(), cc=cc) if cc in known_cc and cc_list.count(cc) > 1: state.set_message("header-cache-control", rs.CC_DUP, cc=cc) # Who can store this? if state.method not in cacheable_methods: state.store_shared = state.store_private = False state.set_message("method", rs.METHOD_UNCACHEABLE, method=state.method) return # bail; nothing else to see here elif "no-store" in cc_keys: state.store_shared = state.store_private = False state.set_message("header-cache-control", rs.NO_STORE) return # bail; nothing else to see here elif "private" in cc_keys: state.store_shared = False state.store_private = True state.set_message("header-cache-control", rs.PRIVATE_CC) elif "authorization" in [k.lower() for k, v in state.req_hdrs] and not "public" in cc_keys: state.store_shared = False state.store_private = True state.set_message("header-cache-control", rs.PRIVATE_AUTH) else: state.store_shared = state.store_private = True state.set_message("header-cache-control", rs.STOREABLE) # no-cache? if "no-cache" in cc_keys: if "last-modified" not in state.parsed_hdrs.keys() and "etag" not in state.parsed_hdrs.keys(): state.set_message("header-cache-control", rs.NO_CACHE_NO_VALIDATOR) else: state.set_message("header-cache-control", rs.NO_CACHE) return # pre-check / post-check if "pre-check" in cc_keys or "post-check" in cc_keys: if "pre-check" not in cc_keys or "post_check" not in cc_keys: state.set_message("header-cache-control", rs.CHECK_SINGLE) else: pre_check = post_check = None try: pre_check = int(cc_dict["pre-check"]) post_check = int(cc_dict["post-check"]) except ValueError: state.set_message("header-cache-control", rs.CHECK_NOT_INTEGER) if pre_check is not None and post_check is not None: if pre_check == 0 and post_check == 0: state.set_message("header-cache-control", rs.CHECK_ALL_ZERO) elif post_check > pre_check: state.set_message("header-cache-control", rs.CHECK_POST_BIGGER) post_check = pre_check elif post_check == 0: state.set_message("header-cache-control", rs.CHECK_POST_ZERO) else: state.set_message( "header-cache-control", rs.CHECK_POST_PRE, pre_check=pre_check, post_check=post_check ) # vary? vary = state.parsed_hdrs.get("vary", set()) if "*" in vary: state.set_message("header-vary", rs.VARY_ASTERISK) return # bail; nothing else to see here elif len(vary) > 3: state.set_message("header-vary", rs.VARY_COMPLEX, vary_count=f_num(len(vary))) else: if "user-agent" in vary: state.set_message("header-vary", rs.VARY_USER_AGENT) if "host" in vary: state.set_message("header-vary", rs.VARY_HOST) # TODO: enumerate the axes in a message # calculate age age_hdr = state.parsed_hdrs.get("age", 0) date_hdr = state.parsed_hdrs.get("date", 0) if date_hdr > 0: apparent_age = max(0, int(state.res_ts - date_hdr)) else: apparent_age = 0 current_age = max(apparent_age, age_hdr) current_age_str = relative_time(current_age, 0, 0) age_str = relative_time(age_hdr, 0, 0) state.age = age_hdr if age_hdr >= 1: state.set_message("header-age header-date", rs.CURRENT_AGE, age=age_str) # Check for clock skew and dateless origin server. skew = date_hdr - state.res_ts + age_hdr if not date_hdr: state.set_message("", rs.DATE_CLOCKLESS) if state.parsed_hdrs.has_key("expires") or state.parsed_hdrs.has_key("last-modified"): state.set_message("header-expires header-last-modified", rs.DATE_CLOCKLESS_BAD_HDR) elif age_hdr > max_clock_skew and current_age - skew < max_clock_skew: state.set_message("header-date header-age", rs.AGE_PENALTY) elif abs(skew) > max_clock_skew: state.set_message("header-date", rs.DATE_INCORRECT, clock_skew_string=relative_time(skew, 0, 2)) else: state.set_message("header-date", rs.DATE_CORRECT) # calculate freshness freshness_lifetime = 0 has_explicit_freshness = False has_cc_freshness = False freshness_hdrs = ["header-date"] if "s-maxage" in cc_keys: # TODO: differentiate message for s-maxage freshness_lifetime = cc_dict["s-maxage"] freshness_hdrs.append("header-cache-control") has_explicit_freshness = True has_cc_freshness = True elif "max-age" in cc_keys: freshness_lifetime = cc_dict["max-age"] freshness_hdrs.append("header-cache-control") has_explicit_freshness = True has_cc_freshness = True elif state.parsed_hdrs.has_key("expires"): has_explicit_freshness = True freshness_hdrs.append("header-expires") if state.parsed_hdrs.has_key("date"): freshness_lifetime = state.parsed_hdrs["expires"] - state.parsed_hdrs["date"] else: freshness_lifetime = state.parsed_hdrs["expires"] - state.res_ts # ? freshness_left = freshness_lifetime - current_age freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0) freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0) state.freshness_lifetime = freshness_lifetime fresh = freshness_left > 0 if has_explicit_freshness: if fresh: state.set_message( " ".join(freshness_hdrs), rs.FRESHNESS_FRESH, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str, ) elif has_cc_freshness and state.age > freshness_lifetime: state.set_message( " ".join(freshness_hdrs), rs.FRESHNESS_STALE_CACHE, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str, ) else: state.set_message( " ".join(freshness_hdrs), rs.FRESHNESS_STALE_ALREADY, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str, ) # can heuristic freshness be used? elif state.res_status in heuristic_cacheable_status: state.set_message("header-last-modified", rs.FRESHNESS_HEURISTIC) else: state.set_message("", rs.FRESHNESS_NONE) # can stale responses be served? if "must-revalidate" in cc_keys: if fresh: state.set_message("header-cache-control", rs.FRESH_MUST_REVALIDATE) elif has_explicit_freshness: state.set_message("header-cache-control", rs.STALE_MUST_REVALIDATE) elif "proxy-revalidate" in cc_keys or "s-maxage" in cc_keys: if fresh: state.set_message("header-cache-control", rs.FRESH_PROXY_REVALIDATE) elif has_explicit_freshness: state.set_message("header-cache-control", rs.STALE_PROXY_REVALIDATE) else: if fresh: state.set_message("header-cache-control", rs.FRESH_SERVABLE) elif has_explicit_freshness: state.set_message("header-cache-control", rs.STALE_SERVABLE) # public? if "public" in cc_keys: # TODO: check for authentication in request state.set_message("header-cache-control", rs.PUBLIC)