Esempio n. 1
0
 def format_options(self, red):
     "Return things that the user can do with the URI as HTML links"
     options = []
     media_type = red.parsed_hdrs.get('content-type', [""])[0]
     options.append(
         (u"response headers: %s bytes" % \
          f_num(red.client.input_header_length), 
          "how large the response headers are, including the status line"
         )
     )
     options.append((u"body: %s bytes" % f_num(red.res_body_len),
         "how large the response body is"))
     transfer_overhead = red.client.input_transfer_length - \
         red.res_body_len
     if transfer_overhead > 0:
         options.append(
             (
              u"transfer overhead: %s bytes" % f_num(transfer_overhead),
              "how much using chunked encoding adds to the response size"
             )
         )
     options.append(None)
     options.append((u"<a href='#' id='body_view'>view body</a>", 
         "View this response body (with any gzip compression removed)"
     ))
     if self.kw.get('test_id', None):
         har_locator = "id=%s" % self.kw['test_id']
     else:
         har_locator = "uri=%s" % e_query_arg(red.uri)
     options.append(
         (u"<a href='?%s&format=har'>view har</a>" % har_locator, 
         "View a HAR (HTTP ARchive, a JSON format) file for this response"
     ))
     if not self.kw.get('is_saved', False):
         if self.kw.get('allow_save', False):
             options.append((
                 u"<a href='#' id='save'>save</a>", 
                 "Save these results for future reference"
             ))
         if self.validators.has_key(media_type):
             options.append((u"<a href='%s'>validate body</a>" %
                self.validators[media_type] % e_query_arg(red.uri), ""))
         if red.link_count > 0:
             options.append((
                  u"<a href='?descend=True&uri=%s'>check assets</a>" % \
                      e_query_arg(red.uri), 
                 "run RED on images, frames and embedded links"
             ))
     return nl.join(
         [o and "<span class='option' title='%s'>%s</span>" % (o[1], o[0])
          or "<br>" for o in options]
     )
Esempio n. 2
0
    def done(self):
        if self.res_status == '206':
            # TODO: check entity headers
            # TODO: check content-range
            if ('gzip' in self.red.parsed_hdrs.get('content-encoding', [])) == \
               ('gzip' not in self.parsed_hdrs.get('content-encoding', [])):
                self.red.setMessage(
                    'header-accept-ranges header-content-encoding',
                    rs.RANGE_NEG_MISMATCH, 
                    self
                )
                return
            if self.parsed_hdrs.get('etag', 1) == self.red.parsed_hdrs.get('etag', 2):
                if self.res_body == self.range_target:
                    self.red.partial_support = True
                    self.red.setMessage('header-accept-ranges', 
                                        rs.RANGE_CORRECT, self
                    )
                else:
                    # the body samples are just bags of bits
                    self.red.partial_support = False
                    self.red.setMessage(
                        'header-accept-ranges',
                        rs.RANGE_INCORRECT,
                        self,
                        range="bytes=%s-%s" % (self.range_start, self.range_end),
                        range_expected=e(
                            self.range_target.encode('string_escape')
                        ),
                        range_expected_bytes = f_num(len(self.range_target)),
                        range_received=e(self.res_body.encode('string_escape')),
                        range_received_bytes = f_num(self.res_body_len)
                    )
            else:
                self.red.setMessage(
                    'header-accept-ranges',
                    rs.RANGE_CHANGED,
                    self
                )

        # TODO: address 416 directly
        elif self.res_status == self.red.res_status:
            self.red.partial_support = False
            self.red.setMessage('header-accept-ranges', rs.RANGE_FULL)
        else:
            self.red.setMessage('header-accept-ranges', rs.RANGE_STATUS,
                                range_status=self.res_status,
                                enc_range_status=e(self.res_status))
Esempio n. 3
0
    def __init__(self, uri, method="GET", req_hdrs=None, req_body=None,
                status_cb=None, body_procs=None):
        orig_req_hdrs = req_hdrs or []
        rh = orig_req_hdrs + [('Accept-Encoding', 'gzip')]
        RedFetcher.__init__(self, uri, method, rh, req_body,
                            status_cb, body_procs, req_type=method)

        # Extra metadata that the "main" RED will be adorned with 
        self.state.orig_req_hdrs = orig_req_hdrs
        self.state.age = None
        self.state.store_shared = None
        self.state.store_private = None
        self.state.freshness_lifetime = None
        self.state.stale_serveable = None
        self.state.partial_support = None
        self.state.inm_support = None
        self.state.ims_support = None
        self.state.gzip_support = None
        self.state.gzip_savings = 0

        # check the URI
        if not re.match("^\s*%s\s*$" % absolute_URI, uri, re.VERBOSE):
            self.state.setMessage('uri', rs.URI_BAD_SYNTAX)
        if len(uri) > max_uri:
            self.state.setMessage('uri', 
                rs.URI_TOO_LONG, 
                uri_len=f_num(len(uri))
            )
Esempio n. 4
0
    def _response_done(self, trailers):
        "Finish anaylsing the response, handling any parse errors."
        state = self.state
        state.res_complete = True
        state.res_done_ts = thor.time()
        state.transfer_length = self.exchange.input_transfer_length
        state.header_length = self.exchange.input_header_length
        # TODO: check trailers
        if self.status_cb and state.type:
            self.status_cb("fetched %s (%s)" % (state.uri, state.type))
        state.res_body_md5 = self._md5_processor.digest()
        state.res_body_post_md5 = self._md5_post_processor.digest()
        checkCaching(state)

        if state.method not in ['HEAD'] and state.res_status not in ['304']:
            # check payload basics
            if state.parsed_hdrs.has_key('content-length'):
                if state.res_body_len == state.parsed_hdrs['content-length']:
                    state.setMessage('header-content-length', rs.CL_CORRECT)
                else:
                    state.setMessage('header-content-length', 
                                    rs.CL_INCORRECT,
                                    body_length=f_num(state.res_body_len)
                    )
            if state.parsed_hdrs.has_key('content-md5'):
                c_md5_calc = base64.encodestring(state.res_body_md5)[:-1]
                if state.parsed_hdrs['content-md5'] == c_md5_calc:
                    state.setMessage('header-content-md5', rs.CMD5_CORRECT)
                else:
                    state.setMessage('header-content-md5', rs.CMD5_INCORRECT,
                                     calc_md5=c_md5_calc)
        self.done()
        self.finish_task()
Esempio n. 5
0
 def done(self):
     if self.state.res_body_len > 0:
         savings = int(100 * 
             (
                 (float(self.state.res_body_len) - \
                 self.base.res_body_len
                 ) / self.state.res_body_len
             )
         )
     else:
         savings = 0
     self.base.gzip_support = True
     self.base.gzip_savings = savings
     if savings >= 0:
         self.setMessage('header-content-encoding',
             rs.CONNEG_GZIP_GOOD,
             savings=savings,
             orig_size=f_num(self.state.res_body_len),
             gzip_size=f_num(self.base.res_body_len)
         )
     else:
         self.setMessage('header-content-encoding',
             rs.CONNEG_GZIP_BAD,
             savings=abs(savings),
             orig_size=f_num(self.state.res_body_len),
             gzip_size=f_num(self.base.res_body_len)
         )
     vary_headers = self.base.parsed_hdrs.get('vary', [])
     if (not "accept-encoding" in vary_headers) \
     and (not "*" in vary_headers):
         self.setMessage('header-vary header-%s', rs.CONNEG_NO_VARY)
     # TODO: verify that the status/body/hdrs are the same; 
     # if it's different, alert
     no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', [])
     if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \
        'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []):
         self.setMessage('header-vary header-content-encoding',
                              rs.CONNEG_GZIP_WITHOUT_ASKING)
     if no_conneg_vary_headers != vary_headers:
         self.setMessage('header-vary', 
             rs.VARY_INCONSISTENT,
             conneg_vary=e(", ".join(vary_headers)),
             no_conneg_vary=e(", ".join(no_conneg_vary_headers))
         )
     if self.state.parsed_hdrs.get('etag', 1) \
        == self.base.parsed_hdrs.get('etag', 2):
         self.setMessage('header-etag', rs.ETAG_DOESNT_CHANGE) 
Esempio n. 6
0
 def _response_body(self, chunk):
     "Process a chunk of the response body."
     state = self.state
     state.res_body_sample.append((state.res_body_len, chunk))
     if len(state.res_body_sample) > 4:
         state.res_body_sample.pop(0)
     self._md5_processor.update(chunk)
     state.res_body_len += len(chunk)
     if state.res_status == "206":
         # Store only partial responses completely, for error reporting
         state.res_body += chunk
         state.res_body_decode_len += len(chunk)
         # Don't actually try to make sense of a partial body...
         return
     content_codings = state.parsed_hdrs.get('content-encoding', [])
     content_codings.reverse()
     for coding in content_codings:
         # TODO: deflate support
         if coding == 'gzip' and self._gzip_ok:
             if not self._in_gzip_body:
                 self._gzip_header_buffer += chunk
                 try:
                     chunk = self._read_gzip_header(
                         self._gzip_header_buffer
                     )
                     self._in_gzip_body = True
                 except IndexError:
                     return # not a full header yet
                 except IOError, gzip_error:
                     state.setMessage('header-content-encoding',
                                     rs.BAD_GZIP,
                                     gzip_error=e(str(gzip_error))
                     )
                     self._gzip_ok = False
                     return
             try:
                 chunk = self._gzip_processor.decompress(chunk)
             except zlib.error, zlib_error:
                 state.setMessage(
                     'header-content-encoding', 
                     rs.BAD_ZLIB,
                     zlib_error=e(str(zlib_error)),
                     ok_zlib_len=f_num(state.res_body_sample[-1][0]),
                     chunk_sample=e(chunk[:20].encode('string_escape'))
                 )
                 self._gzip_ok = False
                 return
Esempio n. 7
0
    def _response_done(self, err):
        "Finish anaylsing the response, handling any parse errors."
        state = self.state
        state.res_complete = True
        state.res_done_ts = nbhttp.now()
        state.transfer_length = self.client.input_transfer_length
        state.header_length = self.client.input_header_length
        self.client = None
        state.res_error = err
        if self.status_cb and state.type:
            self.status_cb("fetched %s (%s)" % (state.uri, state.type))
        state.res_body_md5 = self._md5_processor.digest()
        state.res_body_post_md5 = self._md5_post_processor.digest()
        if err == None:
            pass
        elif err['desc'] == nbhttp.error.ERR_BODY_FORBIDDEN['desc']:
            state.setMessage('header-none', rs.BODY_NOT_ALLOWED)
        elif err['desc'] == nbhttp.error.ERR_EXTRA_DATA['desc']:
            state.res_body_len += len(err.get('detail', ''))
        elif err['desc'] == nbhttp.error.ERR_CHUNK['desc']:
            state.setMessage('header-transfer-encoding', rs.BAD_CHUNK,
                chunk_sample=e(
                    err.get('detail', '')[:20].encode('string_escape')
                )
            )
        elif err['desc'] == nbhttp.error.ERR_CONNECT['desc']:
            state.res_complete = False
        elif err['desc'] == nbhttp.error.ERR_LEN_REQ['desc']:
            pass # TODO: length required
        elif err['desc'] == nbhttp.error.ERR_URL['desc']:
            state.res_complete = False
        elif err['desc'] == nbhttp.error.ERR_READ_TIMEOUT['desc']:
            state.res_complete = False
        elif err['desc'] == nbhttp.error.ERR_HTTP_VERSION['desc']:
            state.res_complete = False
        else:
            raise AssertionError, "Unknown response error: %s" % err

        if state.res_complete:
            checkCaching(state)

        if state.res_complete \
          and state.method not in ['HEAD'] \
          and state.res_status not in ['304']:
            # check payload basics
            if state.parsed_hdrs.has_key('content-length'):
                if state.res_body_len == state.parsed_hdrs['content-length']:
                    state.setMessage('header-content-length', rs.CL_CORRECT)
                else:
                    state.setMessage('header-content-length', 
                                    rs.CL_INCORRECT,
                                    body_length=f_num(state.res_body_len)
                    )
            if state.parsed_hdrs.has_key('content-md5'):
                c_md5_calc = base64.encodestring(state.res_body_md5)[:-1]
                if state.parsed_hdrs['content-md5'] == c_md5_calc:
                    state.setMessage('header-content-md5', rs.CMD5_CORRECT)
                else:
                    state.setMessage('header-content-md5', rs.CMD5_INCORRECT,
                                     calc_md5=c_md5_calc)
        self.done()
        self.finish_task()
Esempio n. 8
0
    def checkCaching(self):
        "Examine HTTP caching characteristics."
        # TODO: check URI for query string, message about HTTP/1.0 if so
        # known Cache-Control directives that don't allow duplicates
        known_cc = ["max-age", "no-store", "s-maxage", "public",
                    "private", "pre-check", "post-check",
                    "stale-while-revalidate", "stale-if-error",
        ]

        cc_set = self.parsed_hdrs.get('cache-control', [])
        cc_list = [k for (k,v) in cc_set]
        cc_dict = dict(cc_set)
        cc_keys = cc_dict.keys()

        # check for mis-capitalised directives /
        # assure there aren't any dup directives with different values
        for cc in cc_keys:
            if cc.lower() in known_cc and cc != cc.lower():
                self.setMessage('header-cache-control', rs.CC_MISCAP,
                    cc_lower = cc.lower(), cc=cc
                )
            if cc in known_cc and cc_list.count(cc) > 1:
                self.setMessage('header-cache-control', rs.CC_DUP,
                    cc=cc
                )

        # Who can store this?
        if self.method not in cacheable_methods:
            self.store_shared = self.store_private = False
            self.setMessage('method', 
                            rs.METHOD_UNCACHEABLE, 
                            method=self.method
            )
            return # bail; nothing else to see here
        elif 'no-store' in cc_keys:
            self.store_shared = self.store_private = False
            self.setMessage('header-cache-control', rs.NO_STORE)
            return # bail; nothing else to see here
        elif 'private' in cc_keys:
            self.store_shared = False
            self.store_private = True
            self.setMessage('header-cache-control', rs.PRIVATE_CC)
        elif 'authorization' in [k.lower() for k, v in self.req_hdrs] and \
          not 'public' in cc_keys:
            self.store_shared = False
            self.store_private = True
            self.setMessage('header-cache-control', rs.PRIVATE_AUTH)
        else:
            self.store_shared = self.store_private = True
            self.setMessage('header-cache-control', rs.STOREABLE)

        # no-cache?
        if 'no-cache' in cc_keys:
            if "last-modified" not in self.parsed_hdrs.keys() and \
               "etag" not in self.parsed_hdrs.keys():
                self.setMessage('header-cache-control',
                                rs.NO_CACHE_NO_VALIDATOR
                )
            else:
                self.setMessage('header-cache-control', rs.NO_CACHE)
            return

        # pre-check / post-check
        if 'pre-check' in cc_keys or 'post-check' in cc_keys:
            if 'pre-check' not in cc_keys or 'post_check' not in cc_keys:
                self.setMessage('header-cache-control', rs.CHECK_SINGLE)
            else:
                pre_check = post_check = None
                try:
                    pre_check = int(cc_dict['pre-check'])
                    post_check = int(cc_dict['post-check'])
                except ValueError:
                    self.setMessage('header-cache-control',
                                    rs.CHECK_NOT_INTEGER
                    )
                if pre_check is not None and post_check is not None:
                    if pre_check == 0 and post_check == 0:
                        self.setMessage('header-cache-control',
                                        rs.CHECK_ALL_ZERO
                        )
                    elif post_check > pre_check:
                        self.setMessage('header-cache-control',
                                        rs.CHECK_POST_BIGGER
                        )
                        post_check = pre_check
                    elif post_check == 0:
                        self.setMessage('header-cache-control',
                                        rs.CHECK_POST_ZERO
                        )
                    else:
                        self.setMessage('header-cache-control',
                                        rs.CHECK_POST_PRE,
                                        pre_check=pre_check,
                                        post_check=post_check
                        )

        # vary?
        vary = self.parsed_hdrs.get('vary', set())
        if "*" in vary:
            self.setMessage('header-vary', rs.VARY_ASTERISK)
            return # bail; nothing else to see here
        elif len(vary) > 3:
            self.setMessage('header-vary', 
                            rs.VARY_COMPLEX, 
                            vary_count=f_num(len(vary))
            )
        else:
            if "user-agent" in vary:
                self.setMessage('header-vary', rs.VARY_USER_AGENT)
            if "host" in vary:
                self.setMessage('header-vary', rs.VARY_HOST)
            # TODO: enumerate the axes in a message

        # calculate age
        age_hdr = self.parsed_hdrs.get('age', 0)
        date_hdr = self.parsed_hdrs.get('date', 0)
        if date_hdr > 0:
            apparent_age = max(0,
              int(self.res_ts - date_hdr))
        else:
            apparent_age = 0
        current_age = max(apparent_age, age_hdr)
        current_age_str = relative_time(current_age, 0, 0)        
        age_str = relative_time(age_hdr, 0, 0)
        self.age = age_hdr
        if age_hdr >= 1:
            self.setMessage('header-age header-date', rs.CURRENT_AGE,
                            age=age_str)

        # Check for clock skew and dateless origin server.
        skew = date_hdr - self.res_ts + age_hdr
        if not date_hdr:
            self.setMessage('', rs.DATE_CLOCKLESS)
            if self.parsed_hdrs.has_key('expires') or \
              self.parsed_hdrs.has_key('last-modified'):
                self.setMessage('header-expires header-last-modified', 
                                rs.DATE_CLOCKLESS_BAD_HDR)
        elif age_hdr > max_clock_skew and current_age - skew < max_clock_skew:
            self.setMessage('header-date header-age', rs.AGE_PENALTY)
        elif abs(skew) > max_clock_skew:
            self.setMessage('header-date', rs.DATE_INCORRECT,
                           clock_skew_string=relative_time(skew, 0, 2)
            )
        else:
            self.setMessage('header-date', rs.DATE_CORRECT)

        # calculate freshness
        freshness_lifetime = 0
        has_explicit_freshness = False
        has_cc_freshness = False
        freshness_hdrs = ['header-date']
        if 's-maxage' in cc_keys: # TODO: differentiate message for s-maxage
            freshness_lifetime = cc_dict['s-maxage']
            freshness_hdrs.append('header-cache-control')
            has_explicit_freshness = True
            has_cc_freshness = True
        elif 'max-age' in cc_keys:
            freshness_lifetime = cc_dict['max-age']
            freshness_hdrs.append('header-cache-control')
            has_explicit_freshness = True
            has_cc_freshness = True
        elif self.parsed_hdrs.has_key('expires'):
            has_explicit_freshness = True
            freshness_hdrs.append('header-expires')
            if self.parsed_hdrs.has_key('date'):
                freshness_lifetime = self.parsed_hdrs['expires'] - \
                    self.parsed_hdrs['date']
            else:
                freshness_lifetime = self.parsed_hdrs['expires'] - \
                    self.res_ts # ?

        freshness_left = freshness_lifetime - current_age
        freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0)
        freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0)

        self.freshness_lifetime = freshness_lifetime
        fresh = freshness_left > 0
        if has_explicit_freshness:
            if fresh:
                self.setMessage(" ".join(freshness_hdrs), rs.FRESHNESS_FRESH,
                                 freshness_lifetime=freshness_lifetime_str,
                                 freshness_left=freshness_left_str,
                                 current_age = current_age_str
                                 )
            elif has_cc_freshness and self.age > freshness_lifetime:
                self.setMessage(" ".join(freshness_hdrs),
                                rs.FRESHNESS_STALE_CACHE,
                                freshness_lifetime=freshness_lifetime_str,
                                freshness_left=freshness_left_str,
                                current_age = current_age_str
                )
            else:
                self.setMessage(" ".join(freshness_hdrs),
                                rs.FRESHNESS_STALE_ALREADY,
                                freshness_lifetime=freshness_lifetime_str,
                                freshness_left=freshness_left_str,
                                current_age = current_age_str
                )

        # can heuristic freshness be used?
        elif self.res_status in heuristic_cacheable_status:
            self.setMessage('header-last-modified', rs.FRESHNESS_HEURISTIC)
        else:
            self.setMessage('', rs.FRESHNESS_NONE)

        # can stale responses be served?
        if 'must-revalidate' in cc_keys:
            if fresh:
                self.setMessage('header-cache-control',
                                rs.FRESH_MUST_REVALIDATE
                )
            elif has_explicit_freshness:
                self.setMessage('header-cache-control',
                                rs.STALE_MUST_REVALIDATE
                )
        elif 'proxy-revalidate' in cc_keys or 's-maxage' in cc_keys:
            if fresh:
                self.setMessage('header-cache-control',
                                rs.FRESH_PROXY_REVALIDATE
                )
            elif has_explicit_freshness:
                self.setMessage('header-cache-control',
                                rs.STALE_PROXY_REVALIDATE
                )
        else:
            if fresh:
                self.setMessage('header-cache-control', rs.FRESH_SERVABLE)
            elif has_explicit_freshness:
                self.setMessage('header-cache-control', rs.STALE_SERVABLE)

        # public?
        if 'public' in cc_keys: # TODO: check for authentication in request
            self.setMessage('header-cache-control', rs.PUBLIC)
Esempio n. 9
0
    def done(self):
        # see if it was compressed when not negotiated
        no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', [])
        if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \
           'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []):
            self.setMessage('header-vary header-content-encoding',
                            rs.CONNEG_GZIP_WITHOUT_ASKING)
        else: # Apparently, content negotiation is happening.

            # check status
            if self.base.res_status != self.state.res_status:
                self.setMessage('status', rs.VARY_STATUS_MISMATCH, 
                  neg_status=self.base.res_status,
                  noneg_status=self.state.res_status)
                return  # Can't be sure what's going on...

            # check headers that should be invariant
            for hdr in ['content-type']:
                if self.base.parsed_hdrs.get(hdr) != \
                  self.state.parsed_hdrs.get(hdr, None):
                    self.setMessage('header-%s' % hdr,
                      rs.VARY_HEADER_MISMATCH, 
                      header=hdr)
                    # TODO: expose on-the-wire values.

            # check Vary headers
            vary_headers = self.base.parsed_hdrs.get('vary', [])
            if (not "accept-encoding" in vary_headers) and \
               (not "*" in vary_headers):
                self.setMessage('header-vary', rs.CONNEG_NO_VARY)
            if no_conneg_vary_headers != vary_headers:
                self.setMessage('header-vary', 
                    rs.VARY_INCONSISTENT,
                    conneg_vary=e(", ".join(vary_headers)),
                    no_conneg_vary=e(", ".join(no_conneg_vary_headers))
                )

            # check body
            if self.base.res_body_post_md5 != self.state.res_body_md5:
                self.setMessage('body', rs.VARY_BODY_MISMATCH)
                return  # Can't be sure what's going on...

            # check ETag
            if self.state.parsed_hdrs.get('etag', 1) \
               == self.base.parsed_hdrs.get('etag', 2):
                self.setMessage('header-etag', rs.VARY_ETAG_DOESNT_CHANGE) 
                # TODO: weakness?

            # check compression efficiency
            if self.state.res_body_len > 0:
                savings = int(100 * 
                    (
                        (float(self.state.res_body_len) - \
                        self.base.res_body_len
                        ) / self.state.res_body_len
                    )
                )
            else:
                savings = 0
            self.base.gzip_support = True
            self.base.gzip_savings = savings
            if savings >= 0:
                self.setMessage('header-content-encoding',
                    rs.CONNEG_GZIP_GOOD,
                    savings=savings,
                    orig_size=f_num(self.state.res_body_len),
                    gzip_size=f_num(self.base.res_body_len)
                )
            else:
                self.setMessage('header-content-encoding',
                    rs.CONNEG_GZIP_BAD,
                    savings=abs(savings),
                    orig_size=f_num(self.state.res_body_len),
                    gzip_size=f_num(self.base.res_body_len)
                )