Exemple #1
0
 def format_options(self, red):
     "Return things that the user can do with the URI as HTML links"
     options = []
     media_type = red.parsed_hdrs.get('content-type', [""])[0]
     options.append(
         (u"response headers: %s bytes" % \
          f_num(red.header_length), 
          "how large the response headers are, including the status line"
         )
     )
     options.append((u"body: %s bytes" % f_num(red.res_body_len),
         "how large the response body is"))
     transfer_overhead = red.transfer_length - \
         red.res_body_len
     if transfer_overhead > 0:
         options.append(
             (
              u"transfer overhead: %s bytes" % f_num(transfer_overhead),
              "how much using chunked encoding adds to the response size"
             )
         )
     options.append(None)
     options.append((u"""\
 <a href='#' id='body_view' accesskey='b'>view body</a>""", 
         "View this response body (with any gzip compression removed)"
     ))
     if self.kw.get('test_id', None):
         har_locator = "id=%s" % self.kw['test_id']
     else:
         har_locator = self.req_qs(red.uri)
     options.append(
         (u"""\
 <a href='?%s&format=har' accesskey='h'>view har</a>""" % har_locator, 
         "View a HAR (HTTP ARchive, a JSON format) file for this response"
     ))
     if not self.kw.get('is_saved', False):
         if self.kw.get('allow_save', False):
             options.append((
                 u"<a href='#' id='save' accesskey='s'>save</a>", 
                 "Save these results for future reference"
             ))
         if self.validators.has_key(media_type):
             options.append(
                 (
                 u"<a href='%s' accesskey='v'>validate body</a>" %
                     self.validators[media_type] % 
                     e_query_arg(red.uri), 
                  ""
                 )
             )
         if hasattr(red, "link_count") and red.link_count > 0:
             options.append((
                  u"<a href='?descend=True&%s' accesskey='a'>check embedded</a>" % \
                      self.req_qs(red.uri), 
                 "run RED on images, frames and embedded links"
             ))
     return nl.join(
         [o and "<span class='option' title='%s'>%s</span>" % (o[1], o[0])
          or "<br>" for o in options]
     )
Exemple #2
0
    def done(self):
        if not self.state.res_complete:
            self.set_message('', rs.RANGE_SUBREQ_PROBLEM,
                problem=self.state.res_error.desc
            )
            return
            
        if self.state.res_status == '206':
            ce = 'content-encoding'
            if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \
               ('gzip' not in self.state.parsed_hdrs.get(ce, [])):
                self.set_message(
                    'header-accept-ranges header-content-encoding',
                    rs.RANGE_NEG_MISMATCH
                )
                return
            if not [True for h in self.base.orig_req_hdrs 
                if h[0].lower() == 'if-range']:
                self.check_missing_hdrs([
                        'date', 'cache-control', 'content-location', 'etag', 
                        'expires', 'vary'
                    ], rs.MISSING_HDRS_206, 'Range'
                )
            if self.state.parsed_hdrs.get('etag', 1) == \
              self.base.parsed_hdrs.get('etag', 2):
                if self.state.res_body == self.range_target:
                    self.base.partial_support = True
                    self.set_message('header-accept-ranges', rs.RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.set_message('header-accept-ranges',
                        rs.RANGE_INCORRECT,
                        range="bytes=%s-%s" % (
                            self.range_start, self.range_end
                        ),
                        range_expected = \
                          self.range_target.encode('string_escape'),
                        range_expected_bytes = f_num(len(self.range_target)),
                        range_received = \
                            self.state.res_body.encode('string_escape'),
                        range_received_bytes = f_num(self.state.res_body_len)
                    )
            else:
                self.set_message('header-accept-ranges', rs.RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.state.res_status == self.base.res_status:
            self.base.partial_support = False
            self.set_message('header-accept-ranges', rs.RANGE_FULL)
        else:
            self.set_message('header-accept-ranges', 
                rs.RANGE_STATUS,
                range_status=self.state.res_status,
                enc_range_status=self.state.res_status or '(unknown)'
            )
Exemple #3
0
    def done(self):
        if not self.state.res_complete:
            self.set_message('',
                             rs.RANGE_SUBREQ_PROBLEM,
                             problem=self.state.res_error.desc)
            return

        if self.state.res_status == '206':
            ce = 'content-encoding'
            if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \
               ('gzip' not in self.state.parsed_hdrs.get(ce, [])):
                self.set_message(
                    'header-accept-ranges header-content-encoding',
                    rs.RANGE_NEG_MISMATCH)
                return
            if not [
                    True for h in self.base.orig_req_hdrs
                    if h[0].lower() == 'if-range'
            ]:
                self.check_missing_hdrs([
                    'date', 'cache-control', 'content-location', 'etag',
                    'expires', 'vary'
                ], rs.MISSING_HDRS_206, 'Range')
            if self.state.parsed_hdrs.get('etag', 1) == \
              self.base.parsed_hdrs.get('etag', 2):
                if self.state.res_body == self.range_target:
                    self.base.partial_support = True
                    self.set_message('header-accept-ranges', rs.RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.set_message('header-accept-ranges',
                        rs.RANGE_INCORRECT,
                        range="bytes=%s-%s" % (
                            self.range_start, self.range_end
                        ),
                        range_expected = \
                          self.range_target.encode('string_escape'),
                        range_expected_bytes = f_num(len(self.range_target)),
                        range_received = \
                            self.state.res_body.encode('string_escape'),
                        range_received_bytes = f_num(self.state.res_body_len)
                    )
            else:
                self.set_message('header-accept-ranges', rs.RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.state.res_status == self.base.res_status:
            self.base.partial_support = False
            self.set_message('header-accept-ranges', rs.RANGE_FULL)
        else:
            self.set_message('header-accept-ranges',
                             rs.RANGE_STATUS,
                             range_status=self.state.res_status,
                             enc_range_status=self.state.res_status
                             or '(unknown)')
Exemple #4
0
    def __init__(self, uri, method="GET", req_hdrs=None, req_body=None,
                status_cb=None, body_procs=None):
        orig_req_hdrs = req_hdrs or []
        rh = orig_req_hdrs + [('Accept-Encoding', 'gzip')]
        RedFetcher.__init__(self, uri, method, rh, req_body,
                            status_cb, body_procs, req_type=method)

        # Extra metadata that the "main" RED will be adorned with 
        self.state.orig_req_hdrs = orig_req_hdrs
        self.state.age = None
        self.state.store_shared = None
        self.state.store_private = None
        self.state.freshness_lifetime = None
        self.state.stale_serveable = None
        self.state.partial_support = None
        self.state.inm_support = None
        self.state.ims_support = None
        self.state.gzip_support = None
        self.state.gzip_savings = 0

        # check the URI
        if not re.match("^\s*%s\s*$" % absolute_URI, uri, re.VERBOSE):
            self.state.set_message('uri', rs.URI_BAD_SYNTAX)
        if len(uri) > max_uri:
            self.state.set_message('uri', 
                rs.URI_TOO_LONG, 
                uri_len=f_num(len(uri))
            )
Exemple #5
0
    def _response_done(self, trailers):
        "Finish anaylsing the response, handling any parse errors."
        self._st.append('_response_done()')
        state = self.state
        state.res_complete = True
        state.res_done_ts = thor.time()
        state.transfer_length = self.exchange.input_transfer_length
        state.header_length = self.exchange.input_header_length
        # TODO: check trailers
        if self.status_cb and state.type:
            self.status_cb("fetched %s (%s)" % (state.uri, state.type))
        state.res_body_md5 = self._md5_processor.digest()
        state.res_body_post_md5 = self._md5_post_processor.digest()
        checkCaching(state)

        if state.method not in ['HEAD'] and state.res_status not in ['304']:
            # check payload basics
            if state.parsed_hdrs.has_key('content-length'):
                if state.res_body_len == state.parsed_hdrs['content-length']:
                    state.set_message('header-content-length', rs.CL_CORRECT)
                else:
                    state.set_message('header-content-length', 
                                    rs.CL_INCORRECT,
                                    body_length=f_num(state.res_body_len)
                    )
            if state.parsed_hdrs.has_key('content-md5'):
                c_md5_calc = base64.encodestring(state.res_body_md5)[:-1]
                if state.parsed_hdrs['content-md5'] == c_md5_calc:
                    state.set_message('header-content-md5', rs.CMD5_CORRECT)
                else:
                    state.set_message('header-content-md5', rs.CMD5_INCORRECT,
                                     calc_md5=c_md5_calc)
        self.done()
        self.finish_task()
Exemple #6
0
    def _response_done(self, trailers):
        "Finish analysing the response, handling any parse errors."
        self._st.append('_response_done()')
        state = self.state
        state.res_complete = True
        state.res_done_ts = thor.time()
        state.transfer_length = self.exchange.input_transfer_length
        state.header_length = self.exchange.input_header_length
        # TODO: check trailers
        if self.status_cb and state.type:
            self.status_cb("fetched %s (%s)" % (state.uri, state.type))
        state.res_body_md5 = self._md5_processor.digest()
        state.res_body_post_md5 = self._md5_post_processor.digest()
        checkCaching(state)

        if state.method not in ['HEAD'] and state.res_status not in ['304']:
            # check payload basics
            if state.parsed_hdrs.has_key('content-length'):
                if state.res_body_len == state.parsed_hdrs['content-length']:
                    state.set_message('header-content-length', rs.CL_CORRECT)
                else:
                    state.set_message('header-content-length',
                                      rs.CL_INCORRECT,
                                      body_length=f_num(state.res_body_len))
            if state.parsed_hdrs.has_key('content-md5'):
                c_md5_calc = base64.encodestring(state.res_body_md5)[:-1]
                if state.parsed_hdrs['content-md5'] == c_md5_calc:
                    state.set_message('header-content-md5', rs.CMD5_CORRECT)
                else:
                    state.set_message('header-content-md5',
                                      rs.CMD5_INCORRECT,
                                      calc_md5=c_md5_calc)
        self.done()
        self.finish_task()
Exemple #7
0
    def done(self):
        if self.state.res_status == '206':
            # TODO: check entity headers
            # TODO: check content-range
            ce = 'content-encoding'
            if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \
               ('gzip' not in self.state.parsed_hdrs.get(ce, [])):
                self.set_message(
                    'header-accept-ranges header-content-encoding',
                    rs.RANGE_NEG_MISMATCH
                )
                return
            if self.state.parsed_hdrs.get('etag', 1) == \
              self.base.parsed_hdrs.get('etag', 2):
                if self.state.res_body == self.range_target:
                    self.base.partial_support = True
                    self.set_message('header-accept-ranges', rs.RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.set_message('header-accept-ranges',
                        rs.RANGE_INCORRECT,
                        range="bytes=%s-%s" % (
                            self.range_start, self.range_end
                        ),
                        range_expected=e(
                            self.range_target.encode('string_escape')
                        ),
                        range_expected_bytes = f_num(len(self.range_target)),
                        range_received = e(
                            self.state.res_body.encode('string_escape')
                        ),
                        range_received_bytes = f_num(self.state.res_body_len)
                    )
            else:
                self.set_message('header-accept-ranges', rs.RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.state.res_status == self.base.res_status:
            self.base.partial_support = False
            self.set_message('header-accept-ranges', rs.RANGE_FULL)
        else:
            self.set_message('header-accept-ranges', 
                rs.RANGE_STATUS,
                range_status=self.state.res_status,
                enc_range_status=e(self.state.res_status or '(unknown)')
            )
Exemple #8
0
    def done(self):
        if self.state.res_status == '206':
            # TODO: check entity headers
            # TODO: check content-range
            ce = 'content-encoding'
            if ('gzip' in self.base.parsed_hdrs.get(ce, [])) == \
               ('gzip' not in self.state.parsed_hdrs.get(ce, [])):
                self.set_message(
                    'header-accept-ranges header-content-encoding',
                    rs.RANGE_NEG_MISMATCH)
                return
            if self.state.parsed_hdrs.get('etag', 1) == \
              self.base.parsed_hdrs.get('etag', 2):
                if self.state.res_body == self.range_target:
                    self.base.partial_support = True
                    self.set_message('header-accept-ranges', rs.RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.set_message(
                        'header-accept-ranges',
                        rs.RANGE_INCORRECT,
                        range="bytes=%s-%s" %
                        (self.range_start, self.range_end),
                        range_expected=e(
                            self.range_target.encode('string_escape')),
                        range_expected_bytes=f_num(len(self.range_target)),
                        range_received=e(
                            self.state.res_body.encode('string_escape')),
                        range_received_bytes=f_num(self.state.res_body_len))
            else:
                self.set_message('header-accept-ranges', rs.RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.state.res_status == self.base.res_status:
            self.base.partial_support = False
            self.set_message('header-accept-ranges', rs.RANGE_FULL)
        else:
            self.set_message('header-accept-ranges',
                             rs.RANGE_STATUS,
                             range_status=self.state.res_status,
                             enc_range_status=e(self.state.res_status
                                                or '(unknown)'))
Exemple #9
0
 def _response_body(self, chunk):
     "Process a chunk of the response body."
     state = self.state
     state.res_body_sample.append((state.res_body_len, chunk))
     if len(state.res_body_sample) > 4:
         state.res_body_sample.pop(0)
     self._md5_processor.update(chunk)
     state.res_body_len += len(chunk)
     if state.res_status == "206":
         # Store only partial responses completely, for error reporting
         state.res_body += chunk
         state.res_body_decode_len += len(chunk)
         # Don't actually try to make sense of a partial body...
         return
     content_codings = state.parsed_hdrs.get('content-encoding', [])
     content_codings.reverse()
     for coding in content_codings:
         # TODO: deflate support
         if coding == 'gzip' and self._gzip_ok:
             if not self._in_gzip_body:
                 self._gzip_header_buffer += chunk
                 try:
                     chunk = self._read_gzip_header(
                         self._gzip_header_buffer
                     )
                     self._in_gzip_body = True
                 except IndexError:
                     return # not a full header yet
                 except IOError, gzip_error:
                     state.set_message('header-content-encoding',
                                     rs.BAD_GZIP,
                                     gzip_error=str(gzip_error)
                     )
                     self._gzip_ok = False
                     return
             try:
                 chunk = self._gzip_processor.decompress(chunk)
             except zlib.error, zlib_error:
                 state.set_message(
                     'header-content-encoding', 
                     rs.BAD_ZLIB,
                     zlib_error=str(zlib_error),
                     ok_zlib_len=f_num(state.res_body_sample[-1][0]),
                     chunk_sample=chunk[:20].encode('string_escape')
                 )
                 self._gzip_ok = False
                 return
Exemple #10
0
 def _response_body(self, chunk):
     "Process a chunk of the response body."
     state = self.state
     state.res_body_sample.append((state.res_body_len, chunk))
     if len(state.res_body_sample) > 4:
         state.res_body_sample.pop(0)
     self._md5_processor.update(chunk)
     state.res_body_len += len(chunk)
     if state.res_status == "206":
         # Store only partial responses completely, for error reporting
         state.res_body += chunk
         state.res_body_decode_len += len(chunk)
         # Don't actually try to make sense of a partial body...
         return
     content_codings = state.parsed_hdrs.get('content-encoding', [])
     content_codings.reverse()
     for coding in content_codings:
         # TODO: deflate support
         if coding == 'gzip' and self._gzip_ok:
             if not self._in_gzip_body:
                 self._gzip_header_buffer += chunk
                 try:
                     chunk = self._read_gzip_header(
                         self._gzip_header_buffer)
                     self._in_gzip_body = True
                 except IndexError:
                     return  # not a full header yet
                 except IOError, gzip_error:
                     state.set_message('header-content-encoding',
                                       rs.BAD_GZIP,
                                       gzip_error=str(gzip_error))
                     self._gzip_ok = False
                     return
             try:
                 chunk = self._gzip_processor.decompress(chunk)
             except zlib.error, zlib_error:
                 state.set_message(
                     'header-content-encoding',
                     rs.BAD_ZLIB,
                     zlib_error=str(zlib_error),
                     ok_zlib_len=f_num(state.res_body_sample[-1][0]),
                     chunk_sample=chunk[:20].encode('string_escape'))
                 self._gzip_ok = False
                 return
Exemple #11
0
    def __init__(self,
                 uri,
                 method="GET",
                 req_hdrs=None,
                 req_body=None,
                 status_cb=None,
                 body_procs=None):
        orig_req_hdrs = req_hdrs or []
        rh = orig_req_hdrs + [(u'Accept-Encoding', u'gzip')]
        RedFetcher.__init__(self,
                            uri,
                            method,
                            rh,
                            req_body,
                            status_cb,
                            body_procs,
                            req_type=method)

        # Extra metadata that the "main" RED will be adorned with
        self.state.orig_req_hdrs = orig_req_hdrs
        self.state.age = None
        self.state.store_shared = None
        self.state.store_private = None
        self.state.freshness_lifetime = None
        self.state.stale_serveable = None
        self.state.partial_support = None
        self.state.inm_support = None
        self.state.ims_support = None
        self.state.gzip_support = None
        self.state.gzip_savings = 0

        # check the URI
        if not re.match("^\s*%s\s*$" % URI, uri, re.VERBOSE):
            self.state.set_message('uri', rs.URI_BAD_SYNTAX)
        if '#' in uri:
            # chop off the fragment
            uri = uri[:uri.index('#')]
        if len(uri) > max_uri:
            self.state.set_message('uri',
                                   rs.URI_TOO_LONG,
                                   uri_len=f_num(len(uri)))
Exemple #12
0
    def done(self):
        # see if it was compressed when not negotiated
        no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', [])
        if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \
           'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []):
            self.set_message('header-vary header-content-encoding',
                             rs.CONNEG_GZIP_WITHOUT_ASKING)
        else:  # Apparently, content negotiation is happening.

            # check status
            if self.base.res_status != self.state.res_status:
                self.set_message('status',
                                 rs.VARY_STATUS_MISMATCH,
                                 neg_status=self.base.res_status,
                                 noneg_status=self.state.res_status)
                return  # Can't be sure what's going on...

            # check headers that should be invariant
            for hdr in ['content-type']:
                if self.base.parsed_hdrs.get(hdr) != \
                  self.state.parsed_hdrs.get(hdr, None):
                    self.set_message('header-%s' % hdr,
                                     rs.VARY_HEADER_MISMATCH,
                                     header=hdr)
                    # TODO: expose on-the-wire values.

            # check Vary headers
            vary_headers = self.base.parsed_hdrs.get('vary', [])
            if (not "accept-encoding" in vary_headers) and \
               (not "*" in vary_headers):
                self.set_message('header-vary', rs.CONNEG_NO_VARY)
            if no_conneg_vary_headers != vary_headers:
                self.set_message('header-vary',
                                 rs.VARY_INCONSISTENT,
                                 conneg_vary=e(", ".join(vary_headers)),
                                 no_conneg_vary=e(
                                     ", ".join(no_conneg_vary_headers)))

            # check body
            if self.base.res_body_post_md5 != self.state.res_body_md5:
                self.set_message('body', rs.VARY_BODY_MISMATCH)
                return  # Can't be sure what's going on...

            # check ETag
            if self.state.parsed_hdrs.get('etag', 1) \
               == self.base.parsed_hdrs.get('etag', 2):
                self.set_message('header-etag', rs.VARY_ETAG_DOESNT_CHANGE)
                # TODO: weakness?

            # check compression efficiency
            if self.state.res_body_len > 0:
                savings = int(100 *
                    (
                        (float(self.state.res_body_len) - \
                        self.base.res_body_len
                        ) / self.state.res_body_len
                    )
                )
            else:
                savings = 0
            self.base.gzip_support = True
            self.base.gzip_savings = savings
            if savings >= 0:
                self.set_message('header-content-encoding',
                                 rs.CONNEG_GZIP_GOOD,
                                 savings=savings,
                                 orig_size=f_num(self.state.res_body_len),
                                 gzip_size=f_num(self.base.res_body_len))
            else:
                self.set_message('header-content-encoding',
                                 rs.CONNEG_GZIP_BAD,
                                 savings=abs(savings),
                                 orig_size=f_num(self.state.res_body_len),
                                 gzip_size=f_num(self.base.res_body_len))
Exemple #13
0
    def done(self):
        if not self.state.res_complete:
            self.set_message('', rs.CONNEG_SUBREQ_PROBLEM,
                problem=self.state.res_error.desc
            )
            return
            
        # see if it was compressed when not negotiated
        no_conneg_vary_headers = self.state.parsed_hdrs.get('vary', [])
        if 'gzip' in self.state.parsed_hdrs.get('content-encoding', []) or \
           'x-gzip' in self.state.parsed_hdrs.get('content-encoding', []):
            self.set_message('header-vary header-content-encoding',
                            rs.CONNEG_GZIP_WITHOUT_ASKING)
        else: # Apparently, content negotiation is happening.

            # check status
            if self.base.res_status != self.state.res_status:
                self.set_message('status', rs.VARY_STATUS_MISMATCH, 
                  neg_status=self.base.res_status,
                  noneg_status=self.state.res_status)
                return  # Can't be sure what's going on...

            # check headers that should be invariant
            for hdr in ['content-type']:
                if self.base.parsed_hdrs.get(hdr) != \
                  self.state.parsed_hdrs.get(hdr, None):
                    self.set_message('header-%s' % hdr,
                      rs.VARY_HEADER_MISMATCH, 
                      header=hdr)
                    # TODO: expose on-the-wire values.

            # check Vary headers
            vary_headers = self.base.parsed_hdrs.get('vary', [])
            if (not "accept-encoding" in vary_headers) and \
               (not "*" in vary_headers):
                self.set_message('header-vary', rs.CONNEG_NO_VARY)
            if no_conneg_vary_headers != vary_headers:
                self.set_message('header-vary', 
                    rs.VARY_INCONSISTENT,
                    conneg_vary=", ".join(vary_headers),
                    no_conneg_vary=", ".join(no_conneg_vary_headers)
                )

            # check body
            if self.base.res_body_post_md5 != self.state.res_body_md5:
                self.set_message('body', rs.VARY_BODY_MISMATCH)
                return  # Can't be sure what's going on...

            # check ETag
            if self.state.parsed_hdrs.get('etag', 1) \
               == self.base.parsed_hdrs.get('etag', 2):
                self.set_message('header-etag', rs.VARY_ETAG_DOESNT_CHANGE) 
                # TODO: weakness?

            # check compression efficiency
            if self.state.res_body_len > 0:
                savings = int(100 * 
                    (
                        (float(self.state.res_body_len) - \
                        self.base.res_body_len
                        ) / self.state.res_body_len
                    )
                )
            else:
                savings = 0
            self.base.gzip_support = True
            self.base.gzip_savings = savings
            if savings >= 0:
                self.set_message('header-content-encoding',
                    rs.CONNEG_GZIP_GOOD,
                    savings=savings,
                    orig_size=f_num(self.state.res_body_len),
                    gzip_size=f_num(self.base.res_body_len)
                )
            else:
                self.set_message('header-content-encoding',
                    rs.CONNEG_GZIP_BAD,
                    savings=abs(savings),
                    orig_size=f_num(self.state.res_body_len),
                    gzip_size=f_num(self.base.res_body_len)
                )
Exemple #14
0
def checkCaching(state):
    "Examine HTTP caching characteristics."

    # TODO: check URI for query string, message about HTTP/1.0 if so

    # get header values
    lm = state.parsed_hdrs.get('last-modified', None)
    date = state.parsed_hdrs.get('date', None)
    cc_set = state.parsed_hdrs.get('cache-control', [])
    cc_list = [k for (k, v) in cc_set]
    cc_dict = dict(cc_set)
    cc_keys = cc_dict.keys()

    # Last-Modified
    if lm:
        serv_date = date or state.res_ts
        if lm > (date or serv_date):
            state.set_message('header-last-modified', rs.LM_FUTURE)
        else:
            state.set_message('header-last-modified',
                              rs.LM_PRESENT,
                              last_modified_string=rh.relative_time(
                                  lm, serv_date))

    # known Cache-Control directives that don't allow duplicates
    known_cc = [
        "max-age",
        "no-store",
        "s-maxage",
        "public",
        "private",
        "pre-check",
        "post-check",
        "stale-while-revalidate",
        "stale-if-error",
    ]

    # check for mis-capitalised directives /
    # assure there aren't any dup directives with different values
    for cc in cc_keys:
        if cc.lower() in known_cc and cc != cc.lower():
            state.set_message('header-cache-control',
                              rs.CC_MISCAP,
                              cc_lower=cc.lower(),
                              cc=cc)
        if cc in known_cc and cc_list.count(cc) > 1:
            state.set_message('header-cache-control', rs.CC_DUP, cc=cc)

    # Who can store this?
    if state.method not in cacheable_methods:
        state.store_shared = state.store_private = False
        state.set_message('method', rs.METHOD_UNCACHEABLE, method=state.method)
        return  # bail; nothing else to see here
    elif 'no-store' in cc_keys:
        state.store_shared = state.store_private = False
        state.set_message('header-cache-control', rs.NO_STORE)
        return  # bail; nothing else to see here
    elif 'private' in cc_keys:
        state.store_shared = False
        state.store_private = True
        state.set_message('header-cache-control', rs.PRIVATE_CC)
    elif 'authorization' in [k.lower() for k, v in state.req_hdrs] and \
      not 'public' in cc_keys:
        state.store_shared = False
        state.store_private = True
        state.set_message('header-cache-control', rs.PRIVATE_AUTH)
    else:
        state.store_shared = state.store_private = True
        state.set_message('header-cache-control', rs.STOREABLE)

    # no-cache?
    if 'no-cache' in cc_keys:
        if "last-modified" not in state.parsed_hdrs.keys() and \
           "etag" not in state.parsed_hdrs.keys():
            state.set_message('header-cache-control', rs.NO_CACHE_NO_VALIDATOR)
        else:
            state.set_message('header-cache-control', rs.NO_CACHE)
        return

    # pre-check / post-check
    if 'pre-check' in cc_keys or 'post-check' in cc_keys:
        if 'pre-check' not in cc_keys or 'post-check' not in cc_keys:
            state.set_message('header-cache-control', rs.CHECK_SINGLE)
        else:
            pre_check = post_check = None
            try:
                pre_check = int(cc_dict['pre-check'])
                post_check = int(cc_dict['post-check'])
            except ValueError:
                state.set_message('header-cache-control', rs.CHECK_NOT_INTEGER)
            if pre_check is not None and post_check is not None:
                if pre_check == 0 and post_check == 0:
                    state.set_message('header-cache-control',
                                      rs.CHECK_ALL_ZERO)
                elif post_check > pre_check:
                    state.set_message('header-cache-control',
                                      rs.CHECK_POST_BIGGER)
                    post_check = pre_check
                elif post_check == 0:
                    state.set_message('header-cache-control',
                                      rs.CHECK_POST_ZERO)
                else:
                    state.set_message('header-cache-control',
                                      rs.CHECK_POST_PRE,
                                      pre_check=pre_check,
                                      post_check=post_check)

    # vary?
    vary = state.parsed_hdrs.get('vary', set())
    if "*" in vary:
        state.set_message('header-vary', rs.VARY_ASTERISK)
        return  # bail; nothing else to see here
    elif len(vary) > 3:
        state.set_message('header-vary',
                          rs.VARY_COMPLEX,
                          vary_count=f_num(len(vary)))
    else:
        if "user-agent" in vary:
            state.set_message('header-vary', rs.VARY_USER_AGENT)
        if "host" in vary:
            state.set_message('header-vary', rs.VARY_HOST)
        # TODO: enumerate the axes in a message

    # calculate age
    age_hdr = state.parsed_hdrs.get('age', 0)
    date_hdr = state.parsed_hdrs.get('date', 0)
    if date_hdr > 0:
        apparent_age = max(0, int(state.res_ts - date_hdr))
    else:
        apparent_age = 0
    current_age = max(apparent_age, age_hdr)
    current_age_str = relative_time(current_age, 0, 0)
    age_str = relative_time(age_hdr, 0, 0)
    state.age = age_hdr
    if age_hdr >= 1:
        state.set_message('header-age header-date',
                          rs.CURRENT_AGE,
                          age=age_str)

    # Check for clock skew and dateless origin server.
    skew = date_hdr - state.res_ts + age_hdr
    if not date_hdr:
        state.set_message('', rs.DATE_CLOCKLESS)
        if state.parsed_hdrs.has_key('expires') or \
          state.parsed_hdrs.has_key('last-modified'):
            state.set_message('header-expires header-last-modified',
                              rs.DATE_CLOCKLESS_BAD_HDR)
    elif age_hdr > max_clock_skew and current_age - skew < max_clock_skew:
        state.set_message('header-date header-age', rs.AGE_PENALTY)
    elif abs(skew) > max_clock_skew:
        state.set_message('header-date',
                          rs.DATE_INCORRECT,
                          clock_skew_string=relative_time(skew, 0, 2))
    else:
        state.set_message('header-date', rs.DATE_CORRECT)

    # calculate freshness
    freshness_lifetime = 0
    has_explicit_freshness = False
    has_cc_freshness = False
    freshness_hdrs = ['header-date']
    if 's-maxage' in cc_keys:  # TODO: differentiate message for s-maxage
        freshness_lifetime = cc_dict['s-maxage']
        freshness_hdrs.append('header-cache-control')
        has_explicit_freshness = True
        has_cc_freshness = True
    elif 'max-age' in cc_keys:
        freshness_lifetime = cc_dict['max-age']
        freshness_hdrs.append('header-cache-control')
        has_explicit_freshness = True
        has_cc_freshness = True
    elif state.parsed_hdrs.has_key('expires'):
        has_explicit_freshness = True
        freshness_hdrs.append('header-expires')
        if state.parsed_hdrs.has_key('date'):
            freshness_lifetime = state.parsed_hdrs['expires'] - \
                state.parsed_hdrs['date']
        else:
            freshness_lifetime = state.parsed_hdrs['expires'] - \
                state.res_ts # ?

    freshness_left = freshness_lifetime - current_age
    freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0)
    freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0)

    state.freshness_lifetime = freshness_lifetime
    fresh = freshness_left > 0
    if has_explicit_freshness:
        if fresh:
            state.set_message(" ".join(freshness_hdrs),
                              rs.FRESHNESS_FRESH,
                              freshness_lifetime=freshness_lifetime_str,
                              freshness_left=freshness_left_str,
                              current_age=current_age_str)
        elif has_cc_freshness and state.age > freshness_lifetime:
            state.set_message(" ".join(freshness_hdrs),
                              rs.FRESHNESS_STALE_CACHE,
                              freshness_lifetime=freshness_lifetime_str,
                              freshness_left=freshness_left_str,
                              current_age=current_age_str)
        else:
            state.set_message(" ".join(freshness_hdrs),
                              rs.FRESHNESS_STALE_ALREADY,
                              freshness_lifetime=freshness_lifetime_str,
                              freshness_left=freshness_left_str,
                              current_age=current_age_str)

    # can heuristic freshness be used?
    elif state.res_status in heuristic_cacheable_status:
        state.set_message('header-last-modified', rs.FRESHNESS_HEURISTIC)
    else:
        state.set_message('', rs.FRESHNESS_NONE)

    # can stale responses be served?
    if 'must-revalidate' in cc_keys:
        if fresh:
            state.set_message('header-cache-control', rs.FRESH_MUST_REVALIDATE)
        elif has_explicit_freshness:
            state.set_message('header-cache-control', rs.STALE_MUST_REVALIDATE)
    elif 'proxy-revalidate' in cc_keys or 's-maxage' in cc_keys:
        if fresh:
            state.set_message('header-cache-control',
                              rs.FRESH_PROXY_REVALIDATE)
        elif has_explicit_freshness:
            state.set_message('header-cache-control',
                              rs.STALE_PROXY_REVALIDATE)
    else:
        if fresh:
            state.set_message('header-cache-control', rs.FRESH_SERVABLE)
        elif has_explicit_freshness:
            state.set_message('header-cache-control', rs.STALE_SERVABLE)

    # public?
    if 'public' in cc_keys:  # TODO: check for authentication in request
        state.set_message('header-cache-control', rs.PUBLIC)
Exemple #15
0
def checkCaching(state):
    "Examine HTTP caching characteristics."

    # TODO: check URI for query string, message about HTTP/1.0 if so
    # known Cache-Control directives that don't allow duplicates
    known_cc = [
        "max-age",
        "no-store",
        "s-maxage",
        "public",
        "private",
        "pre-check",
        "post-check",
        "stale-while-revalidate",
        "stale-if-error",
    ]

    cc_set = state.parsed_hdrs.get("cache-control", [])
    cc_list = [k for (k, v) in cc_set]
    cc_dict = dict(cc_set)
    cc_keys = cc_dict.keys()

    # check for mis-capitalised directives /
    # assure there aren't any dup directives with different values
    for cc in cc_keys:
        if cc.lower() in known_cc and cc != cc.lower():
            state.set_message("header-cache-control", rs.CC_MISCAP, cc_lower=cc.lower(), cc=cc)
        if cc in known_cc and cc_list.count(cc) > 1:
            state.set_message("header-cache-control", rs.CC_DUP, cc=cc)

    # Who can store this?
    if state.method not in cacheable_methods:
        state.store_shared = state.store_private = False
        state.set_message("method", rs.METHOD_UNCACHEABLE, method=state.method)
        return  # bail; nothing else to see here
    elif "no-store" in cc_keys:
        state.store_shared = state.store_private = False
        state.set_message("header-cache-control", rs.NO_STORE)
        return  # bail; nothing else to see here
    elif "private" in cc_keys:
        state.store_shared = False
        state.store_private = True
        state.set_message("header-cache-control", rs.PRIVATE_CC)
    elif "authorization" in [k.lower() for k, v in state.req_hdrs] and not "public" in cc_keys:
        state.store_shared = False
        state.store_private = True
        state.set_message("header-cache-control", rs.PRIVATE_AUTH)
    else:
        state.store_shared = state.store_private = True
        state.set_message("header-cache-control", rs.STOREABLE)

    # no-cache?
    if "no-cache" in cc_keys:
        if "last-modified" not in state.parsed_hdrs.keys() and "etag" not in state.parsed_hdrs.keys():
            state.set_message("header-cache-control", rs.NO_CACHE_NO_VALIDATOR)
        else:
            state.set_message("header-cache-control", rs.NO_CACHE)
        return

    # pre-check / post-check
    if "pre-check" in cc_keys or "post-check" in cc_keys:
        if "pre-check" not in cc_keys or "post_check" not in cc_keys:
            state.set_message("header-cache-control", rs.CHECK_SINGLE)
        else:
            pre_check = post_check = None
            try:
                pre_check = int(cc_dict["pre-check"])
                post_check = int(cc_dict["post-check"])
            except ValueError:
                state.set_message("header-cache-control", rs.CHECK_NOT_INTEGER)
            if pre_check is not None and post_check is not None:
                if pre_check == 0 and post_check == 0:
                    state.set_message("header-cache-control", rs.CHECK_ALL_ZERO)
                elif post_check > pre_check:
                    state.set_message("header-cache-control", rs.CHECK_POST_BIGGER)
                    post_check = pre_check
                elif post_check == 0:
                    state.set_message("header-cache-control", rs.CHECK_POST_ZERO)
                else:
                    state.set_message(
                        "header-cache-control", rs.CHECK_POST_PRE, pre_check=pre_check, post_check=post_check
                    )

    # vary?
    vary = state.parsed_hdrs.get("vary", set())
    if "*" in vary:
        state.set_message("header-vary", rs.VARY_ASTERISK)
        return  # bail; nothing else to see here
    elif len(vary) > 3:
        state.set_message("header-vary", rs.VARY_COMPLEX, vary_count=f_num(len(vary)))
    else:
        if "user-agent" in vary:
            state.set_message("header-vary", rs.VARY_USER_AGENT)
        if "host" in vary:
            state.set_message("header-vary", rs.VARY_HOST)
        # TODO: enumerate the axes in a message

    # calculate age
    age_hdr = state.parsed_hdrs.get("age", 0)
    date_hdr = state.parsed_hdrs.get("date", 0)
    if date_hdr > 0:
        apparent_age = max(0, int(state.res_ts - date_hdr))
    else:
        apparent_age = 0
    current_age = max(apparent_age, age_hdr)
    current_age_str = relative_time(current_age, 0, 0)
    age_str = relative_time(age_hdr, 0, 0)
    state.age = age_hdr
    if age_hdr >= 1:
        state.set_message("header-age header-date", rs.CURRENT_AGE, age=age_str)

    # Check for clock skew and dateless origin server.
    skew = date_hdr - state.res_ts + age_hdr
    if not date_hdr:
        state.set_message("", rs.DATE_CLOCKLESS)
        if state.parsed_hdrs.has_key("expires") or state.parsed_hdrs.has_key("last-modified"):
            state.set_message("header-expires header-last-modified", rs.DATE_CLOCKLESS_BAD_HDR)
    elif age_hdr > max_clock_skew and current_age - skew < max_clock_skew:
        state.set_message("header-date header-age", rs.AGE_PENALTY)
    elif abs(skew) > max_clock_skew:
        state.set_message("header-date", rs.DATE_INCORRECT, clock_skew_string=relative_time(skew, 0, 2))
    else:
        state.set_message("header-date", rs.DATE_CORRECT)

    # calculate freshness
    freshness_lifetime = 0
    has_explicit_freshness = False
    has_cc_freshness = False
    freshness_hdrs = ["header-date"]
    if "s-maxage" in cc_keys:  # TODO: differentiate message for s-maxage
        freshness_lifetime = cc_dict["s-maxage"]
        freshness_hdrs.append("header-cache-control")
        has_explicit_freshness = True
        has_cc_freshness = True
    elif "max-age" in cc_keys:
        freshness_lifetime = cc_dict["max-age"]
        freshness_hdrs.append("header-cache-control")
        has_explicit_freshness = True
        has_cc_freshness = True
    elif state.parsed_hdrs.has_key("expires"):
        has_explicit_freshness = True
        freshness_hdrs.append("header-expires")
        if state.parsed_hdrs.has_key("date"):
            freshness_lifetime = state.parsed_hdrs["expires"] - state.parsed_hdrs["date"]
        else:
            freshness_lifetime = state.parsed_hdrs["expires"] - state.res_ts  # ?

    freshness_left = freshness_lifetime - current_age
    freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0)
    freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0)

    state.freshness_lifetime = freshness_lifetime
    fresh = freshness_left > 0
    if has_explicit_freshness:
        if fresh:
            state.set_message(
                " ".join(freshness_hdrs),
                rs.FRESHNESS_FRESH,
                freshness_lifetime=freshness_lifetime_str,
                freshness_left=freshness_left_str,
                current_age=current_age_str,
            )
        elif has_cc_freshness and state.age > freshness_lifetime:
            state.set_message(
                " ".join(freshness_hdrs),
                rs.FRESHNESS_STALE_CACHE,
                freshness_lifetime=freshness_lifetime_str,
                freshness_left=freshness_left_str,
                current_age=current_age_str,
            )
        else:
            state.set_message(
                " ".join(freshness_hdrs),
                rs.FRESHNESS_STALE_ALREADY,
                freshness_lifetime=freshness_lifetime_str,
                freshness_left=freshness_left_str,
                current_age=current_age_str,
            )

    # can heuristic freshness be used?
    elif state.res_status in heuristic_cacheable_status:
        state.set_message("header-last-modified", rs.FRESHNESS_HEURISTIC)
    else:
        state.set_message("", rs.FRESHNESS_NONE)

    # can stale responses be served?
    if "must-revalidate" in cc_keys:
        if fresh:
            state.set_message("header-cache-control", rs.FRESH_MUST_REVALIDATE)
        elif has_explicit_freshness:
            state.set_message("header-cache-control", rs.STALE_MUST_REVALIDATE)
    elif "proxy-revalidate" in cc_keys or "s-maxage" in cc_keys:
        if fresh:
            state.set_message("header-cache-control", rs.FRESH_PROXY_REVALIDATE)
        elif has_explicit_freshness:
            state.set_message("header-cache-control", rs.STALE_PROXY_REVALIDATE)
    else:
        if fresh:
            state.set_message("header-cache-control", rs.FRESH_SERVABLE)
        elif has_explicit_freshness:
            state.set_message("header-cache-control", rs.STALE_SERVABLE)

    # public?
    if "public" in cc_keys:  # TODO: check for authentication in request
        state.set_message("header-cache-control", rs.PUBLIC)