def format_options(self, red): "Return things that the user can do with the URI as HTML links" options = [] media_type = red.parsed_hdrs.get('content-type', [""])[0] options.append( (u"response headers: %s bytes" % \ f_num(red.client.input_header_length), "how large the response headers are, including the status line" ) ) options.append((u"body: %s bytes" % f_num(red.res_body_len), "how large the response body is")) transfer_overhead = red.client.input_transfer_length - \ red.res_body_len if transfer_overhead > 0: options.append( ( u"transfer overhead: %s bytes" % f_num(transfer_overhead), "how much using chunked encoding adds to the response size" ) ) options.append(None) options.append((u"<a href='#' id='body_view'>view body</a>", "View this response body (with any gzip compression removed)" )) if self.kw.get('test_id', None): har_locator = "id=%s" % self.kw['test_id'] else: har_locator = "uri=%s" % e_query_arg(red.uri) options.append( (u"<a href='?%s&format=har'>view har</a>" % har_locator, "View a HAR (HTTP ARchive, a JSON format) file for this response" )) if not self.kw.get('is_saved', False): if self.kw.get('allow_save', False): options.append(( u"<a href='#' id='save'>save</a>", "Save these results for future reference" )) if self.validators.has_key(media_type): options.append((u"<a href='%s'>validate body</a>" % self.validators[media_type] % e_query_arg(red.uri), "")) if red.link_count > 0: options.append(( u"<a href='?descend=True&uri=%s'>check assets</a>" % \ e_query_arg(red.uri), "run RED on images, frames and embedded links" )) return nl.join( [o and "<span class='option' title='%s'>%s</span>" % (o[1], o[0]) or "<br>" for o in options] )
def done(self): if self.res_status == '206': # TODO: check entity headers # TODO: check content-range if ('gzip' in self.red.parsed_hdrs.get('content-encoding', [])) == \ ('gzip' not in self.parsed_hdrs.get('content-encoding', [])): self.red.setMessage( 'header-accept-ranges header-content-encoding', rs.RANGE_NEG_MISMATCH, self ) return if self.parsed_hdrs.get('etag', 1) == self.red.parsed_hdrs.get('etag', 2): if self.res_body == self.range_target: self.red.partial_support = True self.red.setMessage('header-accept-ranges', rs.RANGE_CORRECT, self ) else: # the body samples are just bags of bits self.red.partial_support = False self.red.setMessage( 'header-accept-ranges', rs.RANGE_INCORRECT, self, range="bytes=%s-%s" % (self.range_start, self.range_end), range_expected=e( self.range_target.encode('string_escape') ), range_expected_bytes = f_num(len(self.range_target)), range_received=e(self.res_body.encode('string_escape')), range_received_bytes = f_num(self.res_body_len) ) else: self.red.setMessage( 'header-accept-ranges', rs.RANGE_CHANGED, self ) # TODO: address 416 directly elif self.res_status == self.red.res_status: self.red.partial_support = False self.red.setMessage('header-accept-ranges', rs.RANGE_FULL) else: self.red.setMessage('header-accept-ranges', rs.RANGE_STATUS, range_status=self.res_status, enc_range_status=e(self.res_status))
def __init__(self, uri, method="GET", req_hdrs=None, req_body=None, status_cb=None, body_procs=None): orig_req_hdrs = req_hdrs or [] rh = orig_req_hdrs + [('Accept-Encoding', 'gzip')] RedFetcher.__init__(self, uri, method, rh, req_body, status_cb, body_procs, req_type=method) # Extra metadata that the "main" RED will be adorned with self.state.orig_req_hdrs = orig_req_hdrs self.state.age = None self.state.store_shared = None self.state.store_private = None self.state.freshness_lifetime = None self.state.stale_serveable = None self.state.partial_support = None self.state.inm_support = None self.state.ims_support = None self.state.gzip_support = None self.state.gzip_savings = 0 # check the URI if not re.match("^\s*%s\s*$" % absolute_URI, uri, re.VERBOSE): self.state.setMessage('uri', rs.URI_BAD_SYNTAX) if len(uri) > max_uri: self.state.setMessage('uri', rs.URI_TOO_LONG, uri_len=f_num(len(uri)) )
def _response_done(self, trailers): "Finish anaylsing the response, handling any parse errors." state = self.state state.res_complete = True state.res_done_ts = thor.time() state.transfer_length = self.exchange.input_transfer_length state.header_length = self.exchange.input_header_length # TODO: check trailers if self.status_cb and state.type: self.status_cb("fetched %s (%s)" % (state.uri, state.type)) state.res_body_md5 = self._md5_processor.digest() state.res_body_post_md5 = self._md5_post_processor.digest() checkCaching(state) if state.method not in ['HEAD'] and state.res_status not in ['304']: # check payload basics if state.parsed_hdrs.has_key('content-length'): if state.res_body_len == state.parsed_hdrs['content-length']: state.setMessage('header-content-length', rs.CL_CORRECT) else: state.setMessage('header-content-length', rs.CL_INCORRECT, body_length=f_num(state.res_body_len) ) if state.parsed_hdrs.has_key('content-md5'): c_md5_calc = base64.encodestring(state.res_body_md5)[:-1] if state.parsed_hdrs['content-md5'] == c_md5_calc: state.setMessage('header-content-md5', rs.CMD5_CORRECT) else: state.setMessage('header-content-md5', rs.CMD5_INCORRECT, calc_md5=c_md5_calc) self.done() self.finish_task()
def done(self): if self.state.res_body_len > 0: savings = int(100 * ( (float(self.state.res_body_len) - \ self.base.res_body_len ) / self.state.res_body_len ) ) else: savings = 0 self.base.gzip_support = True self.base.gzip_savings = savings if savings >= 0: self.setMessage('header-content-encoding', rs.CONNEG_GZIP_GOOD, savings=savings, orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len) ) else: self.setMessage('header-content-encoding', rs.CONNEG_GZIP_BAD, savings=abs(savings), orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len) ) vary_headers = self.base.parsed_hdrs.get('vary', []) if (not "accept-encoding" in vary_headers) \ and (not "*" in vary_headers): self.setMessage('header-vary header-%s', rs.CONNEG_NO_VARY) # TODO: verify that the status/body/hdrs are the same; # if it's different, alert no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', []) if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \ 'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []): self.setMessage('header-vary header-content-encoding', rs.CONNEG_GZIP_WITHOUT_ASKING) if no_conneg_vary_headers != vary_headers: self.setMessage('header-vary', rs.VARY_INCONSISTENT, conneg_vary=e(", ".join(vary_headers)), no_conneg_vary=e(", ".join(no_conneg_vary_headers)) ) if self.state.parsed_hdrs.get('etag', 1) \ == self.base.parsed_hdrs.get('etag', 2): self.setMessage('header-etag', rs.ETAG_DOESNT_CHANGE)
def _response_body(self, chunk): "Process a chunk of the response body." state = self.state state.res_body_sample.append((state.res_body_len, chunk)) if len(state.res_body_sample) > 4: state.res_body_sample.pop(0) self._md5_processor.update(chunk) state.res_body_len += len(chunk) if state.res_status == "206": # Store only partial responses completely, for error reporting state.res_body += chunk state.res_body_decode_len += len(chunk) # Don't actually try to make sense of a partial body... return content_codings = state.parsed_hdrs.get('content-encoding', []) content_codings.reverse() for coding in content_codings: # TODO: deflate support if coding == 'gzip' and self._gzip_ok: if not self._in_gzip_body: self._gzip_header_buffer += chunk try: chunk = self._read_gzip_header( self._gzip_header_buffer ) self._in_gzip_body = True except IndexError: return # not a full header yet except IOError, gzip_error: state.setMessage('header-content-encoding', rs.BAD_GZIP, gzip_error=e(str(gzip_error)) ) self._gzip_ok = False return try: chunk = self._gzip_processor.decompress(chunk) except zlib.error, zlib_error: state.setMessage( 'header-content-encoding', rs.BAD_ZLIB, zlib_error=e(str(zlib_error)), ok_zlib_len=f_num(state.res_body_sample[-1][0]), chunk_sample=e(chunk[:20].encode('string_escape')) ) self._gzip_ok = False return
def _response_done(self, err): "Finish anaylsing the response, handling any parse errors." state = self.state state.res_complete = True state.res_done_ts = nbhttp.now() state.transfer_length = self.client.input_transfer_length state.header_length = self.client.input_header_length self.client = None state.res_error = err if self.status_cb and state.type: self.status_cb("fetched %s (%s)" % (state.uri, state.type)) state.res_body_md5 = self._md5_processor.digest() state.res_body_post_md5 = self._md5_post_processor.digest() if err == None: pass elif err['desc'] == nbhttp.error.ERR_BODY_FORBIDDEN['desc']: state.setMessage('header-none', rs.BODY_NOT_ALLOWED) elif err['desc'] == nbhttp.error.ERR_EXTRA_DATA['desc']: state.res_body_len += len(err.get('detail', '')) elif err['desc'] == nbhttp.error.ERR_CHUNK['desc']: state.setMessage('header-transfer-encoding', rs.BAD_CHUNK, chunk_sample=e( err.get('detail', '')[:20].encode('string_escape') ) ) elif err['desc'] == nbhttp.error.ERR_CONNECT['desc']: state.res_complete = False elif err['desc'] == nbhttp.error.ERR_LEN_REQ['desc']: pass # TODO: length required elif err['desc'] == nbhttp.error.ERR_URL['desc']: state.res_complete = False elif err['desc'] == nbhttp.error.ERR_READ_TIMEOUT['desc']: state.res_complete = False elif err['desc'] == nbhttp.error.ERR_HTTP_VERSION['desc']: state.res_complete = False else: raise AssertionError, "Unknown response error: %s" % err if state.res_complete: checkCaching(state) if state.res_complete \ and state.method not in ['HEAD'] \ and state.res_status not in ['304']: # check payload basics if state.parsed_hdrs.has_key('content-length'): if state.res_body_len == state.parsed_hdrs['content-length']: state.setMessage('header-content-length', rs.CL_CORRECT) else: state.setMessage('header-content-length', rs.CL_INCORRECT, body_length=f_num(state.res_body_len) ) if state.parsed_hdrs.has_key('content-md5'): c_md5_calc = base64.encodestring(state.res_body_md5)[:-1] if state.parsed_hdrs['content-md5'] == c_md5_calc: state.setMessage('header-content-md5', rs.CMD5_CORRECT) else: state.setMessage('header-content-md5', rs.CMD5_INCORRECT, calc_md5=c_md5_calc) self.done() self.finish_task()
def checkCaching(self): "Examine HTTP caching characteristics." # TODO: check URI for query string, message about HTTP/1.0 if so # known Cache-Control directives that don't allow duplicates known_cc = ["max-age", "no-store", "s-maxage", "public", "private", "pre-check", "post-check", "stale-while-revalidate", "stale-if-error", ] cc_set = self.parsed_hdrs.get('cache-control', []) cc_list = [k for (k,v) in cc_set] cc_dict = dict(cc_set) cc_keys = cc_dict.keys() # check for mis-capitalised directives / # assure there aren't any dup directives with different values for cc in cc_keys: if cc.lower() in known_cc and cc != cc.lower(): self.setMessage('header-cache-control', rs.CC_MISCAP, cc_lower = cc.lower(), cc=cc ) if cc in known_cc and cc_list.count(cc) > 1: self.setMessage('header-cache-control', rs.CC_DUP, cc=cc ) # Who can store this? if self.method not in cacheable_methods: self.store_shared = self.store_private = False self.setMessage('method', rs.METHOD_UNCACHEABLE, method=self.method ) return # bail; nothing else to see here elif 'no-store' in cc_keys: self.store_shared = self.store_private = False self.setMessage('header-cache-control', rs.NO_STORE) return # bail; nothing else to see here elif 'private' in cc_keys: self.store_shared = False self.store_private = True self.setMessage('header-cache-control', rs.PRIVATE_CC) elif 'authorization' in [k.lower() for k, v in self.req_hdrs] and \ not 'public' in cc_keys: self.store_shared = False self.store_private = True self.setMessage('header-cache-control', rs.PRIVATE_AUTH) else: self.store_shared = self.store_private = True self.setMessage('header-cache-control', rs.STOREABLE) # no-cache? if 'no-cache' in cc_keys: if "last-modified" not in self.parsed_hdrs.keys() and \ "etag" not in self.parsed_hdrs.keys(): self.setMessage('header-cache-control', rs.NO_CACHE_NO_VALIDATOR ) else: self.setMessage('header-cache-control', rs.NO_CACHE) return # pre-check / post-check if 'pre-check' in cc_keys or 'post-check' in cc_keys: if 'pre-check' not in cc_keys or 'post_check' not in cc_keys: self.setMessage('header-cache-control', rs.CHECK_SINGLE) else: pre_check = post_check = None try: pre_check = int(cc_dict['pre-check']) post_check = int(cc_dict['post-check']) except ValueError: self.setMessage('header-cache-control', rs.CHECK_NOT_INTEGER ) if pre_check is not None and post_check is not None: if pre_check == 0 and post_check == 0: self.setMessage('header-cache-control', rs.CHECK_ALL_ZERO ) elif post_check > pre_check: self.setMessage('header-cache-control', rs.CHECK_POST_BIGGER ) post_check = pre_check elif post_check == 0: self.setMessage('header-cache-control', rs.CHECK_POST_ZERO ) else: self.setMessage('header-cache-control', rs.CHECK_POST_PRE, pre_check=pre_check, post_check=post_check ) # vary? vary = self.parsed_hdrs.get('vary', set()) if "*" in vary: self.setMessage('header-vary', rs.VARY_ASTERISK) return # bail; nothing else to see here elif len(vary) > 3: self.setMessage('header-vary', rs.VARY_COMPLEX, vary_count=f_num(len(vary)) ) else: if "user-agent" in vary: self.setMessage('header-vary', rs.VARY_USER_AGENT) if "host" in vary: self.setMessage('header-vary', rs.VARY_HOST) # TODO: enumerate the axes in a message # calculate age age_hdr = self.parsed_hdrs.get('age', 0) date_hdr = self.parsed_hdrs.get('date', 0) if date_hdr > 0: apparent_age = max(0, int(self.res_ts - date_hdr)) else: apparent_age = 0 current_age = max(apparent_age, age_hdr) current_age_str = relative_time(current_age, 0, 0) age_str = relative_time(age_hdr, 0, 0) self.age = age_hdr if age_hdr >= 1: self.setMessage('header-age header-date', rs.CURRENT_AGE, age=age_str) # Check for clock skew and dateless origin server. skew = date_hdr - self.res_ts + age_hdr if not date_hdr: self.setMessage('', rs.DATE_CLOCKLESS) if self.parsed_hdrs.has_key('expires') or \ self.parsed_hdrs.has_key('last-modified'): self.setMessage('header-expires header-last-modified', rs.DATE_CLOCKLESS_BAD_HDR) elif age_hdr > max_clock_skew and current_age - skew < max_clock_skew: self.setMessage('header-date header-age', rs.AGE_PENALTY) elif abs(skew) > max_clock_skew: self.setMessage('header-date', rs.DATE_INCORRECT, clock_skew_string=relative_time(skew, 0, 2) ) else: self.setMessage('header-date', rs.DATE_CORRECT) # calculate freshness freshness_lifetime = 0 has_explicit_freshness = False has_cc_freshness = False freshness_hdrs = ['header-date'] if 's-maxage' in cc_keys: # TODO: differentiate message for s-maxage freshness_lifetime = cc_dict['s-maxage'] freshness_hdrs.append('header-cache-control') has_explicit_freshness = True has_cc_freshness = True elif 'max-age' in cc_keys: freshness_lifetime = cc_dict['max-age'] freshness_hdrs.append('header-cache-control') has_explicit_freshness = True has_cc_freshness = True elif self.parsed_hdrs.has_key('expires'): has_explicit_freshness = True freshness_hdrs.append('header-expires') if self.parsed_hdrs.has_key('date'): freshness_lifetime = self.parsed_hdrs['expires'] - \ self.parsed_hdrs['date'] else: freshness_lifetime = self.parsed_hdrs['expires'] - \ self.res_ts # ? freshness_left = freshness_lifetime - current_age freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0) freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0) self.freshness_lifetime = freshness_lifetime fresh = freshness_left > 0 if has_explicit_freshness: if fresh: self.setMessage(" ".join(freshness_hdrs), rs.FRESHNESS_FRESH, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age = current_age_str ) elif has_cc_freshness and self.age > freshness_lifetime: self.setMessage(" ".join(freshness_hdrs), rs.FRESHNESS_STALE_CACHE, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age = current_age_str ) else: self.setMessage(" ".join(freshness_hdrs), rs.FRESHNESS_STALE_ALREADY, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age = current_age_str ) # can heuristic freshness be used? elif self.res_status in heuristic_cacheable_status: self.setMessage('header-last-modified', rs.FRESHNESS_HEURISTIC) else: self.setMessage('', rs.FRESHNESS_NONE) # can stale responses be served? if 'must-revalidate' in cc_keys: if fresh: self.setMessage('header-cache-control', rs.FRESH_MUST_REVALIDATE ) elif has_explicit_freshness: self.setMessage('header-cache-control', rs.STALE_MUST_REVALIDATE ) elif 'proxy-revalidate' in cc_keys or 's-maxage' in cc_keys: if fresh: self.setMessage('header-cache-control', rs.FRESH_PROXY_REVALIDATE ) elif has_explicit_freshness: self.setMessage('header-cache-control', rs.STALE_PROXY_REVALIDATE ) else: if fresh: self.setMessage('header-cache-control', rs.FRESH_SERVABLE) elif has_explicit_freshness: self.setMessage('header-cache-control', rs.STALE_SERVABLE) # public? if 'public' in cc_keys: # TODO: check for authentication in request self.setMessage('header-cache-control', rs.PUBLIC)
def done(self): # see if it was compressed when not negotiated no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', []) if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \ 'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []): self.setMessage('header-vary header-content-encoding', rs.CONNEG_GZIP_WITHOUT_ASKING) else: # Apparently, content negotiation is happening. # check status if self.base.res_status != self.state.res_status: self.setMessage('status', rs.VARY_STATUS_MISMATCH, neg_status=self.base.res_status, noneg_status=self.state.res_status) return # Can't be sure what's going on... # check headers that should be invariant for hdr in ['content-type']: if self.base.parsed_hdrs.get(hdr) != \ self.state.parsed_hdrs.get(hdr, None): self.setMessage('header-%s' % hdr, rs.VARY_HEADER_MISMATCH, header=hdr) # TODO: expose on-the-wire values. # check Vary headers vary_headers = self.base.parsed_hdrs.get('vary', []) if (not "accept-encoding" in vary_headers) and \ (not "*" in vary_headers): self.setMessage('header-vary', rs.CONNEG_NO_VARY) if no_conneg_vary_headers != vary_headers: self.setMessage('header-vary', rs.VARY_INCONSISTENT, conneg_vary=e(", ".join(vary_headers)), no_conneg_vary=e(", ".join(no_conneg_vary_headers)) ) # check body if self.base.res_body_post_md5 != self.state.res_body_md5: self.setMessage('body', rs.VARY_BODY_MISMATCH) return # Can't be sure what's going on... # check ETag if self.state.parsed_hdrs.get('etag', 1) \ == self.base.parsed_hdrs.get('etag', 2): self.setMessage('header-etag', rs.VARY_ETAG_DOESNT_CHANGE) # TODO: weakness? # check compression efficiency if self.state.res_body_len > 0: savings = int(100 * ( (float(self.state.res_body_len) - \ self.base.res_body_len ) / self.state.res_body_len ) ) else: savings = 0 self.base.gzip_support = True self.base.gzip_savings = savings if savings >= 0: self.setMessage('header-content-encoding', rs.CONNEG_GZIP_GOOD, savings=savings, orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len) ) else: self.setMessage('header-content-encoding', rs.CONNEG_GZIP_BAD, savings=abs(savings), orig_size=f_num(self.state.res_body_len), gzip_size=f_num(self.base.res_body_len) )