Beispiel #1
0
    def done(self) -> None:
        if not self.response.complete:
            if self.response.http_error:
                problem = self.response.http_error.desc
            else:
                problem = ""
            self.add_base_note("", RANGE_SUBREQ_PROBLEM, problem=problem)
            return

        if self.response.status_code == "206":
            c_e = "content-encoding"
            if ("gzip" in self.base.response.parsed_headers.get(c_e, []) ==
                    "gzip" not in self.response.parsed_headers.get(c_e, [])):
                self.add_base_note(
                    "header-accept-ranges header-content-encoding",
                    RANGE_NEG_MISMATCH)
                return
            self.check_missing_hdrs(
                [
                    "date",
                    "cache-control",
                    "content-location",
                    "etag",
                    "expires",
                    "vary",
                ],
                MISSING_HDRS_206,
            )
            if self.response.parsed_headers.get(
                    "etag", None) == self.base.response.parsed_headers.get(
                        "etag", None):
                if self.response.payload == self.range_target:
                    self.base.partial_support = True
                    self.add_base_note("header-accept-ranges", RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.add_base_note(
                        "header-accept-ranges",
                        RANGE_INCORRECT,
                        range="bytes=%s-%s" %
                        (self.range_start, self.range_end),
                        range_expected=display_bytes(self.range_target),
                        range_expected_bytes=f_num(len(self.range_target)),
                        range_received=display_bytes(self.response.payload),
                        range_received_bytes=f_num(self.response.payload_len),
                    )
            else:
                self.add_base_note("header-accept-ranges", RANGE_CHANGED)

        elif self.response.status_code == self.base.response.status_code:
            self.base.partial_support = False
            self.add_base_note("header-accept-ranges", RANGE_FULL)
        else:
            self.add_base_note(
                "header-accept-ranges",
                RANGE_STATUS,
                range_status=self.response.status_code,
                enc_range_status=self.response.status_code or "(unknown)",
            )
Beispiel #2
0
    def done(self):
        if not self.response.complete:
            self.add_note('', rs.RANGE_SUBREQ_PROBLEM,
                problem=self.response.http_error.desc
            )
            return
            
        if self.response.status_code == '206':
            c_e = 'content-encoding'
            if 'gzip' in self.base.response.parsed_headers.get(c_e, []) == \
               'gzip' not in self.response.parsed_headers.get(c_e, []):
                self.add_note(
                    'header-accept-ranges header-content-encoding',
                    rs.RANGE_NEG_MISMATCH
                )
                return
            if not [True for h in self.base.orig_req_hdrs 
                if h[0].lower() == 'if-range']:
                self.check_missing_hdrs([
                        'date', 'cache-control', 'content-location', 'etag', 
                        'expires', 'vary'
                    ], rs.MISSING_HDRS_206, 'Range'
                )
            if self.response.parsed_headers.get('etag', 1) == \
              self.base.response.parsed_headers.get('etag', 2):
                if self.response.payload == self.range_target:
                    self.base.partial_support = True
                    self.add_note('header-accept-ranges', rs.RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.add_note('header-accept-ranges',
                        rs.RANGE_INCORRECT,
                        range="bytes=%s-%s" % (
                            self.range_start, self.range_end
                        ),
                        range_expected = \
                          self.range_target.encode('string_escape'),
                        range_expected_bytes = f_num(len(self.range_target)),
                        range_received = \
                          self.response.payload.encode('string_escape'),
                        range_received_bytes = \
                          f_num(self.response.payload_len)
                    )
            else:
                self.add_note('header-accept-ranges', rs.RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.response.status_code == \
          self.base.response.status_code:
            self.base.partial_support = False
            self.add_note('header-accept-ranges', rs.RANGE_FULL)
        else:
            self.add_note('header-accept-ranges', 
                rs.RANGE_STATUS,
                range_status=self.response.status_code,
                enc_range_status=self.response.status_code or \
                  '(unknown)'
            )
Beispiel #3
0
    def format_options(self, resource: HttpResource) -> str:
        "Return things that the user can do with the URI as HTML links"
        options = []
        media_type = resource.response.parsed_headers.get(
            "content-type", [""])[0]
        options.append((
            "response headers: %s bytes" %
            f_num(resource.response.header_length),
            "how large the response headers are, including the status line",
        ))
        options.append((
            "body: %s bytes" % f_num(resource.response.payload_len),
            "how large the response body is",
        ))
        transfer_overhead = (resource.response.transfer_length -
                             resource.response.payload_len)
        if transfer_overhead > 0:
            options.append((
                "transfer overhead: %s bytes" % f_num(transfer_overhead),
                "how much using chunked encoding adds to the response size",
            ))
        options.append(None)
        options.append((
            """\
<script type="text/javascript">
   document.write("<a href='#' id='body_view' accesskey='b'>view body</a>")
</script>""",
            "View this response body (with any gzip compression removed)",
        ))
        if isinstance(resource, HttpResource):
            options.append((
                """\
        <a href="?%s" accesskey="h">view har</a>""" %
                self.req_qs(res_format="har"),
                "View a HAR (HTTP ARchive, a JSON format) file for this test",
            ))
        if not self.kw.get("is_saved", False):
            if self.kw.get("allow_save", False):
                options.append((
                    "<a href=\"#\" id='save' accesskey='s'>save</a>",
                    "Save these results for future reference",
                ))
            if media_type in self.validators:
                options.append((
                    "<a href=\"%s\" accesskey='v'>validate body</a>" %
                    self.validators[media_type] %
                    e_query_arg(resource.request.uri),
                    "",
                ))
            if hasattr(resource, "link_count") and resource.link_count > 0:
                options.append((
                    "<a href=\"?descend=True&%s\" accesskey='a'>"
                    "check embedded</a>" % self.req_qs(use_stored=False),
                    "run REDbot on images, frames and embedded links",
                ))
        return nl.join([
            o and "<span class='option' title='%s'>%s</span>" % (o[1], o[0])
            or "<br>" for o in options
        ])
Beispiel #4
0
    def format_options(self, state):
        "Return things that the user can do with the URI as HTML links"
        options = []
        media_type = state.response.parsed_headers.get('content-type', [""])[0]
        options.append(
            (u"response headers: %s bytes" % \
             f_num(state.response.header_length), 
             u"how large the response headers are, including the status line"
            )
        )
        options.append((u"body: %s bytes" % f_num(state.response.payload_len),
            u"how large the response body is"))
        transfer_overhead = state.response.transfer_length - \
            state.response.payload_len
        if transfer_overhead > 0:
            options.append(
                (
                 u"transfer overhead: %s bytes" % f_num(transfer_overhead),
                 u"how much using chunked encoding adds to the response size"
                )
            )
        options.append(None)
        options.append((u"""\
<script type="text/javascript">
   document.write("<a href='#' id='body_view' accesskey='b'>view body</a>")
</script>""", 
    "View this response body (with any gzip compression removed)"
        ))
        options.append(
            (u"""\
    <a href='?%s' accesskey='h'>view har</a>""" % self.req_qs(res_format='har'), 
            "View a HAR (HTTP ARchive, a JSON format) file for this response"
        ))
        if not self.kw.get('is_saved', False):
            if self.kw.get('allow_save', False):
                options.append((
                    u"<a href='#' id='save' accesskey='s'>save</a>", 
                    "Save these results for future reference"
                ))
            if self.validators.has_key(media_type):
                options.append(
                    (
                    u"<a href='%s' accesskey='v'>validate body</a>" %
                        self.validators[media_type] % 
                        e_query_arg(state.request.uri), 
                     ""
                    )
                )
            if hasattr(state, "link_count") and state.link_count > 0:
                options.append((
                    u"<a href='?descend=True&%s' accesskey='a'>" \
                    u"check embedded</a>" % self.req_qs(use_stored=False), 
                    "run RED on images, frames and embedded links"
                ))
        return nl.join(
            [o and u"<span class='option' title='%s'>%s</span>" % (o[1], o[0])
             or u"<br>" for o in options]
        )
Beispiel #5
0
    def done(self) -> None:
        if not self.response.complete:
            if self.response.http_error:
                problem = self.response.http_error.desc
            else:
                problem = ""
            self.add_base_note('', RANGE_SUBREQ_PROBLEM, problem=problem)
            return

        if self.response.status_code == '206':
            c_e = 'content-encoding'
            if 'gzip' in self.base.response.parsed_headers.get(c_e, []) == \
               'gzip' not in self.response.parsed_headers.get(c_e, []):
                self.add_base_note(
                    'header-accept-ranges header-content-encoding',
                    RANGE_NEG_MISMATCH)
                return
            self.check_missing_hdrs([
                'date', 'cache-control', 'content-location', 'etag', 'expires',
                'vary'
            ], MISSING_HDRS_206)
            if self.response.parsed_headers.get('etag', None) == \
              self.base.response.parsed_headers.get('etag', None):
                if self.response.payload == self.range_target:
                    self.base.partial_support = True
                    self.add_base_note('header-accept-ranges', RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.add_base_note(
                        'header-accept-ranges',
                        RANGE_INCORRECT,
                        range="bytes=%s-%s" %
                        (self.range_start, self.range_end),
                        range_expected=display_bytes(self.range_target),
                        range_expected_bytes=f_num(len(self.range_target)),
                        range_received=display_bytes(self.response.payload),
                        range_received_bytes=f_num(self.response.payload_len))
            else:
                self.add_base_note('header-accept-ranges', RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.response.status_code == self.base.response.status_code:
            self.base.partial_support = False
            self.add_base_note('header-accept-ranges', RANGE_FULL)
        else:
            self.add_base_note('header-accept-ranges',
                               RANGE_STATUS,
                               range_status=self.response.status_code,
                               enc_range_status=self.response.status_code
                               or '(unknown)')
Beispiel #6
0
    def body_done(self, complete, trailers=None):
        """
        Signal that the body is done. Complete should be True if we 
        know it's complete.
        """
        # TODO: check trailers
        self.complete = complete
        self.trailers = trailers or []
        self.payload_md5 = self._md5_processor.digest()
        self.decoded_md5 = self._md5_post_processor.digest()

        if self.is_request or \
          (not self.is_head_response and self.status_code not in ['304']):
            # check payload basics
            if self.parsed_headers.has_key('content-length'):
                if self.payload_len == self.parsed_headers['content-length']:
                    self.add_note('header-content-length', rs.CL_CORRECT)
                else:
                    self.add_note('header-content-length', 
                                    rs.CL_INCORRECT,
                                    body_length=f_num(self.payload_len)
                    )
            if self.parsed_headers.has_key('content-md5'):
                c_md5_calc = base64.encodestring(self.payload_md5)[:-1]
                if self.parsed_headers['content-md5'] == c_md5_calc:
                    self.add_note('header-content-md5', rs.CMD5_CORRECT)
                else:
                    self.add_note('header-content-md5', 
                                  rs.CMD5_INCORRECT, calc_md5=c_md5_calc)
Beispiel #7
0
    def body_done(self, complete: bool, trailers: RawHeaderListType = None) -> None:
        """
        Signal that the body is done. Complete should be True if we
        know it's complete (e.g., final chunk, Content-Length).
        """
        self.complete = complete
        self.complete_time = thor.time()
        self.trailers = trailers or []
        self.payload_md5 = self._md5_processor.digest()
        self.decoded_md5 = self._md5_post_processor.digest()

        if self.is_request or \
          (not self.is_head_response and self.status_code not in ['304']):
            # check payload basics
            if 'content-length' in self.parsed_headers:
                if self.payload_len == self.parsed_headers['content-length']:
                    self.add_note('header-content-length', CL_CORRECT)
                else:
                    self.add_note('header-content-length',
                                  CL_INCORRECT,
                                  body_length=f_num(self.payload_len))
            if 'content-md5' in self.parsed_headers:
                c_md5_calc = base64.encodebytes(self.payload_md5)[:-1]
                if self.parsed_headers['content-md5'] == c_md5_calc:
                    self.add_note('header-content-md5', CMD5_CORRECT)
                else:
                    self.add_note('header-content-md5',
                                  CMD5_INCORRECT, calc_md5=c_md5_calc)
        self.emit('content_available')
Beispiel #8
0
    def body_done(self,
                  complete: bool,
                  trailers: RawHeaderListType = None) -> None:
        """
        Signal that the body is done. Complete should be True if we
        know it's complete (e.g., final chunk, Content-Length).
        """
        self.complete = complete
        self.complete_time = thor.time()
        self.trailers = trailers or []
        self.payload_md5 = self._md5_processor.digest()
        self.decoded_md5 = self._md5_post_processor.digest()

        if self.is_request or \
          (not self.is_head_response and self.status_code not in ['304']):
            # check payload basics
            if 'content-length' in self.parsed_headers:
                if self.payload_len == self.parsed_headers['content-length']:
                    self.add_note('header-content-length', CL_CORRECT)
                else:
                    self.add_note('header-content-length',
                                  CL_INCORRECT,
                                  body_length=f_num(self.payload_len))
            if 'content-md5' in self.parsed_headers:
                c_md5_calc = base64.encodebytes(self.payload_md5)[:-1]
                if self.parsed_headers['content-md5'] == c_md5_calc:
                    self.add_note('header-content-md5', CMD5_CORRECT)
                else:
                    self.add_note('header-content-md5',
                                  CMD5_INCORRECT,
                                  calc_md5=c_md5_calc)
        self.emit('content_available')
Beispiel #9
0
    def process(self, headers: RawHeaderListType) -> Tuple[StrHeaderListType, HeaderDictType]:
        """
        Given a list of (bytes name, bytes value) headers and:
         - calculate the total header block size
         - call msg.add_note as appropriate
        Returns:
         - a list of unicode header tuples
         - a dict of parsed header values
        """
        unicode_headers = []   # unicode version of the header tuples
        parsed_headers = {}    # dictionary of parsed header values
        offset = 0             # what number header we're on

        # estimate the start-lines size
        header_block_size = len(self.message.version)
        if self.message.is_request:
            header_block_size += len(self.message.method) + len(self.message.uri) + 2
        else:
            header_block_size += len(self.message.status_phrase) + 5

        for name, value in headers:
            offset += 1
            add_note = partial(self.message.add_note, "offset-%s" % offset)

            # track header size
            header_size = len(name) + len(value)
            header_block_size += header_size

            # decode the header to make it unicode clean
            try:
                str_name = name.decode('ascii', 'strict')
            except UnicodeError:
                str_name = name.decode('ascii', 'ignore')
                add_note(HEADER_NAME_ENCODING, field_name=str_name)
            try:
                str_value = value.decode('ascii', 'strict')
            except UnicodeError:
                str_value = value.decode('iso-8859-1', 'replace')
                add_note(HEADER_VALUE_ENCODING, field_name=str_name)
            unicode_headers.append((str_name, str_value))

            header_handler = self.get_header_handler(str_name)
            field_add_note = partial(add_note, # type: ignore
                                     field_name=header_handler.canonical_name)
            header_handler.handle_input(str_value, field_add_note)

            if header_size > MAX_HDR_SIZE:
                add_note(HEADER_TOO_LARGE, field_name=header_handler.canonical_name,
                         header_size=f_num(header_size))

        # check each of the complete header values and get the parsed value
        for header_name, header_handler in list(self._header_handlers.items()):
            header_add_note = partial(self.message.add_note,
                                      "header-%s" % header_handler.canonical_name.lower(),
                                      field_name=header_handler.canonical_name)
            header_handler.finish(self.message, header_add_note) # type: ignore
            parsed_headers[header_handler.norm_name] = header_handler.value

        return unicode_headers, parsed_headers
Beispiel #10
0
    def done(self) -> None:
        if not self.response.complete:
            if self.response.http_error:
                problem = self.response.http_error.desc
            else:
                problem = ""
            self.add_base_note('', RANGE_SUBREQ_PROBLEM, problem=problem)
            return

        if self.response.status_code == '206':
            c_e = 'content-encoding'
            if 'gzip' in self.base.response.parsed_headers.get(c_e, []) == \
               'gzip' not in self.response.parsed_headers.get(c_e, []):
                self.add_base_note('header-accept-ranges header-content-encoding',
                                   RANGE_NEG_MISMATCH)
                return
            self.check_missing_hdrs(['date', 'cache-control', 'content-location', 'etag',
                                     'expires', 'vary'], MISSING_HDRS_206)
            if self.response.parsed_headers.get('etag', None) == \
              self.base.response.parsed_headers.get('etag', None):
                if self.response.payload == self.range_target:
                    self.base.partial_support = True
                    self.add_base_note('header-accept-ranges', RANGE_CORRECT)
                else:
                    # the body samples are just bags of bits
                    self.base.partial_support = False
                    self.add_base_note('header-accept-ranges', RANGE_INCORRECT,
                                       range="bytes=%s-%s" % (self.range_start, self.range_end),
                                       range_expected=display_bytes(self.range_target),
                                       range_expected_bytes=f_num(len(self.range_target)),
                                       range_received=display_bytes(self.response.payload),
                                       range_received_bytes=f_num(self.response.payload_len))
            else:
                self.add_base_note('header-accept-ranges', RANGE_CHANGED)

        # TODO: address 416 directly
        elif self.response.status_code == self.base.response.status_code:
            self.base.partial_support = False
            self.add_base_note('header-accept-ranges', RANGE_FULL)
        else:
            self.add_base_note('header-accept-ranges', RANGE_STATUS,
                               range_status=self.response.status_code,
                               enc_range_status=self.response.status_code or '(unknown)')
Beispiel #11
0
 def set_iri(self, iri: str) -> None:
     """
     Given a unicode string (possibly an IRI), convert to a URI and make sure it's sensible.
     """
     self.iri = iri
     try:
         self.uri = self.iri_to_uri(iri)
     except (ValueError, UnicodeError) as why:
         raise thor.http.error.UrlError(why.args[0])
     if not re.match(r"^\s*%s\s*$" % rfc3986.URI, self.uri, re.VERBOSE):
         self.add_note('uri', URI_BAD_SYNTAX)
     if '#' in self.uri:
         # chop off the fragment
         self.uri = self.uri[:self.uri.index('#')]
     if len(self.uri) > MAX_URI:
         self.add_note('uri', URI_TOO_LONG, uri_len=f_num(len(self.uri)))
Beispiel #12
0
 def set_iri(self, iri: str) -> None:
     """
     Given a unicode string (possibly an IRI), convert to a URI and make sure it's sensible.
     """
     self.iri = iri
     try:
         self.uri = self.iri_to_uri(iri)
     except (ValueError, UnicodeError) as why:
         raise thor.http.error.UrlError(why.args[0])
     if not re.match(r"^\s*%s\s*$" % rfc3986.URI, self.uri, re.VERBOSE):
         self.add_note('uri', URI_BAD_SYNTAX)
     if '#' in self.uri:
         # chop off the fragment
         self.uri = self.uri[:self.uri.index('#')]
     if len(self.uri) > MAX_URI:
         self.add_note('uri', URI_TOO_LONG, uri_len=f_num(len(self.uri)))
Beispiel #13
0
    def _process_content_codings(self, chunk: bytes) -> bytes:
        """
        Decode a chunk according to the message's content-encoding header.

        Currently supports gzip.
        """
        content_codings = self.parsed_headers.get("content-encoding", [])
        content_codings.reverse()
        for coding in content_codings:
            if coding in ["gzip", "x-gzip"] and self._decode_ok:
                if not self._in_gzip_body:
                    self._gzip_header_buffer += chunk
                    try:
                        chunk = self._read_gzip_header(
                            self._gzip_header_buffer)
                        self._in_gzip_body = True
                    except IndexError:
                        return b""  # not a full header yet
                    except IOError as gzip_error:
                        self.add_note(
                            "header-content-encoding",
                            BAD_GZIP,
                            gzip_error=str(gzip_error),
                        )
                        self._decode_ok = False
                        return b""
                try:
                    chunk = self._gzip_processor.decompress(chunk)
                except zlib.error as zlib_error:
                    self.add_note(
                        "header-content-encoding",
                        BAD_ZLIB,
                        zlib_error=str(zlib_error),
                        ok_zlib_len=f_num(self.payload_len),
                        chunk_sample=display_bytes(chunk),
                    )
                    self._decode_ok = False
                    return b""
            else:
                # we can't handle other codecs, so punt on body processing.
                self._decode_ok = False
                return b""
        self._md5_post_processor.update(chunk)
        self.decoded_len += len(chunk)
        return chunk
Beispiel #14
0
    def _process_content_codings(self, chunk: bytes) -> bytes:
        """
        Decode a chunk according to the message's content-encoding header.

        Currently supports gzip.
        """
        content_codings = self.parsed_headers.get('content-encoding', [])
        content_codings.reverse()
        for coding in content_codings:
            if coding in ['gzip', 'x-gzip'] and self._decode_ok:
                if not self._in_gzip_body:
                    self._gzip_header_buffer += chunk
                    try:
                        chunk = self._read_gzip_header(self._gzip_header_buffer)
                        self._in_gzip_body = True
                    except IndexError:
                        return b'' # not a full header yet
                    except IOError as gzip_error:
                        self.add_note('header-content-encoding',
                                      BAD_GZIP,
                                      gzip_error=str(gzip_error))
                        self._decode_ok = False
                        return b''
                try:
                    chunk = self._gzip_processor.decompress(chunk)
                except zlib.error as zlib_error:
                    self.add_note(
                        'header-content-encoding',
                        BAD_ZLIB,
                        zlib_error=str(zlib_error),
                        ok_zlib_len=f_num(self.payload_len),
                        chunk_sample=display_bytes(chunk)
                    )
                    self._decode_ok = False
                    return b''
            else:
                # we can't handle other codecs, so punt on body processing.
                self._decode_ok = False
                return b''
        self._md5_post_processor.update(chunk)
        self.decoded_len += len(chunk)
        return chunk
Beispiel #15
0
 def _process_content_codings(self, chunk):
     """
     Decode a chunk according to the message's content-encoding header.
     
     Currently supports gzip.
     """
     content_codings = self.parsed_headers.get('content-encoding', [])
     content_codings.reverse()
     for coding in content_codings:
         # TODO: deflate support
         if coding in ['gzip', 'x-gzip'] and self._decode_ok:
             if not self._in_gzip_body:
                 self._gzip_header_buffer += chunk
                 try:
                     chunk = self._read_gzip_header(
                         self._gzip_header_buffer
                     )
                     self._in_gzip_body = True
                 except IndexError:
                     return '' # not a full header yet
                 except IOError, gzip_error:
                     self.add_note('header-content-encoding',
                                     rs.BAD_GZIP,
                                     gzip_error=str(gzip_error)
                     )
                     self._decode_ok = False
                     return
             try:
                 chunk = self._gzip_processor.decompress(chunk)
             except zlib.error, zlib_error:
                 self.add_note(
                     'header-content-encoding', 
                     rs.BAD_ZLIB,
                     zlib_error=str(zlib_error),
                     ok_zlib_len=f_num(self.payload_sample[-1][0]),
                     chunk_sample=chunk[:20].encode('string_escape')
                 )
                 self._decode_ok = False
                 return
Beispiel #16
0
    def done(self) -> None:
        negotiated = self.response
        bare = self.base.response

        if not negotiated.complete:
            if negotiated.http_error:
                problem = negotiated.http_error.desc
            else:
                problem = ""
            self.add_base_note('', CONNEG_SUBREQ_PROBLEM, problem=problem)
            return

        # see if it was compressed when not negotiated
        no_conneg_vary_headers = bare.parsed_headers.get('vary', [])
        if 'gzip' in bare.parsed_headers.get('content-encoding', []) \
          or 'x-gzip' in bare.parsed_headers.get('content-encoding', []):
            self.add_base_note('header-vary header-content-encoding',
                               CONNEG_GZIP_WITHOUT_ASKING)
        if 'gzip' not in negotiated.parsed_headers.get('content-encoding', []) \
          and 'x-gzip' not in negotiated.parsed_headers.get('content-encoding', []):
            self.base.gzip_support = False
        else:  # Apparently, content negotiation is happening.
            # check status
            if bare.status_code != negotiated.status_code:
                self.add_base_note('status',
                                   VARY_STATUS_MISMATCH,
                                   neg_status=negotiated.status_code,
                                   noneg_status=bare.status_code)
                return  # Can't be sure what's going on...

            # check headers that should be invariant
            for hdr in ['content-type']:
                if bare.parsed_headers.get(
                        hdr) != negotiated.parsed_headers.get(hdr, None):
                    self.add_base_note('header-%s' % hdr,
                                       VARY_HEADER_MISMATCH,
                                       header=hdr)

            # check Vary headers
            vary_headers = negotiated.parsed_headers.get('vary', [])
            if (not "accept-encoding" in vary_headers) and (not "*"
                                                            in vary_headers):
                self.add_base_note('header-vary', CONNEG_NO_VARY)
            if no_conneg_vary_headers != vary_headers:
                self.add_base_note(
                    'header-vary',
                    VARY_INCONSISTENT,
                    conneg_vary=", ".join(vary_headers),
                    no_conneg_vary=", ".join(no_conneg_vary_headers))

            # check body
            if bare.decoded_md5 != negotiated.payload_md5:
                self.add_base_note('body', VARY_BODY_MISMATCH)

            # check ETag
            if bare.parsed_headers.get('etag',
                                       1) == negotiated.parsed_headers.get(
                                           'etag', 2):
                if not self.base.response.parsed_headers['etag'][0]:  # strong
                    self.add_base_note('header-etag', VARY_ETAG_DOESNT_CHANGE)

            # check compression efficiency
            if negotiated.payload_len > 0 and bare.payload_len > 0:
                savings = int(
                    100 * ((float(bare.payload_len) - negotiated.payload_len) /
                           bare.payload_len))
            elif negotiated.payload_len > 0 and bare.payload_len == 0:
                # weird.
                return
            else:
                savings = 0
            self.base.gzip_support = True
            self.base.gzip_savings = savings
            if savings >= 0:
                self.add_base_note('header-content-encoding',
                                   CONNEG_GZIP_GOOD,
                                   savings=savings,
                                   orig_size=f_num(bare.payload_len),
                                   gzip_size=f_num(negotiated.payload_len))
            else:
                self.add_base_note('header-content-encoding',
                                   CONNEG_GZIP_BAD,
                                   savings=abs(savings),
                                   orig_size=f_num(bare.payload_len),
                                   gzip_size=f_num(negotiated.payload_len))
Beispiel #17
0
 def format_size(value: int) -> str:
     if value is None:
         return '<td>-</td>'
     return '<td>%s</td>' % f_num(value, by1024=True)
Beispiel #18
0
    def done(self) -> None:
        negotiated = self.response
        bare = self.base.response

        if not negotiated.complete:
            if negotiated.http_error:
                problem = negotiated.http_error.desc
            else:
                problem = ""
            self.add_base_note('', CONNEG_SUBREQ_PROBLEM, problem=problem)
            return

        # see if it was compressed when not negotiated
        no_conneg_vary_headers = bare.parsed_headers.get('vary', [])
        if 'gzip' in bare.parsed_headers.get('content-encoding', []) \
          or 'x-gzip' in bare.parsed_headers.get('content-encoding', []):
            self.add_base_note('header-vary header-content-encoding', CONNEG_GZIP_WITHOUT_ASKING)
        if 'gzip' not in negotiated.parsed_headers.get('content-encoding', []) \
          and 'x-gzip' not in negotiated.parsed_headers.get('content-encoding', []):
            self.base.gzip_support = False
        else: # Apparently, content negotiation is happening.
            # check status
            if bare.status_code != negotiated.status_code:
                self.add_base_note('status', VARY_STATUS_MISMATCH,
                                   neg_status=negotiated.status_code,
                                   noneg_status=bare.status_code)
                return  # Can't be sure what's going on...

            # check headers that should be invariant
            for hdr in ['content-type']:
                if bare.parsed_headers.get(hdr) != negotiated.parsed_headers.get(hdr, None):
                    self.add_base_note('header-%s' % hdr, VARY_HEADER_MISMATCH, header=hdr)

            # check Vary headers
            vary_headers = negotiated.parsed_headers.get('vary', [])
            if (not "accept-encoding" in vary_headers) and (not "*" in vary_headers):
                self.add_base_note('header-vary', CONNEG_NO_VARY)
            if no_conneg_vary_headers != vary_headers:
                self.add_base_note('header-vary', VARY_INCONSISTENT,
                                   conneg_vary=", ".join(vary_headers),
                                   no_conneg_vary=", ".join(no_conneg_vary_headers))

            # check body
            if bare.payload_md5 != negotiated.decoded_md5:
                self.add_base_note('body', VARY_BODY_MISMATCH)

            # check ETag
            if bare.parsed_headers.get('etag', 1) == negotiated.parsed_headers.get('etag', 2):
                if not self.base.response.parsed_headers['etag'][0]: # strong
                    self.add_base_note('header-etag', VARY_ETAG_DOESNT_CHANGE)

            # check compression efficiency
            if negotiated.payload_len > 0 and bare.payload_len > 0:
                savings = int(100 * (
                    (float(bare.payload_len) - negotiated.payload_len) / bare.payload_len))
            elif negotiated.payload_len > 0 and bare.payload_len == 0:
                # weird.
                return
            else:
                savings = 0
            self.base.gzip_support = True
            self.base.gzip_savings = savings
            if savings >= 0:
                self.add_base_note('header-content-encoding', CONNEG_GZIP_GOOD,
                                   savings=savings,
                                   orig_size=f_num(bare.payload_len),
                                   gzip_size=f_num(negotiated.payload_len))
            else:
                self.add_base_note('header-content-encoding', CONNEG_GZIP_BAD,
                                   savings=abs(savings),
                                   orig_size=f_num(bare.payload_len),
                                   gzip_size=f_num(negotiated.payload_len))
Beispiel #19
0
    def process(
        self, headers: RawHeaderListType
    ) -> Tuple[StrHeaderListType, HeaderDictType]:
        """
        Given a list of (bytes name, bytes value) headers and:
         - calculate the total header block size
         - call msg.add_note as appropriate
        Returns:
         - a list of unicode header tuples
         - a dict of parsed header values
        """
        unicode_headers = []  # unicode version of the header tuples
        parsed_headers = {}  # dictionary of parsed header values
        offset = 0  # what number header we're on

        # estimate the start-lines size
        header_block_size = len(self.message.version)
        if self.message.is_request:
            header_block_size += len(self.message.method) + len(self.message.uri) + 2
        else:
            header_block_size += len(self.message.status_phrase) + 5

        for name, value in headers:
            offset += 1
            add_note = partial(self.message.add_note, "offset-%s" % offset)

            # track header size
            header_size = len(name) + len(value)
            header_block_size += header_size

            # decode the header to make it unicode clean
            try:
                str_name = name.decode("ascii", "strict")
            except UnicodeError:
                str_name = name.decode("ascii", "ignore")
                add_note(HEADER_NAME_ENCODING, field_name=str_name)
            try:
                str_value = value.decode("ascii", "strict")
            except UnicodeError:
                str_value = value.decode("iso-8859-1", "replace")
                add_note(HEADER_VALUE_ENCODING, field_name=str_name)
            unicode_headers.append((str_name, str_value))

            header_handler = self.get_header_handler(str_name)
            field_add_note = partial(
                add_note,  # type: ignore
                field_name=header_handler.canonical_name,
            )
            header_handler.handle_input(str_value, field_add_note)

            if header_size > MAX_HDR_SIZE:
                add_note(
                    HEADER_TOO_LARGE,
                    field_name=header_handler.canonical_name,
                    header_size=f_num(header_size),
                )

        # check each of the complete header values and get the parsed value
        for header_name, header_handler in list(self._header_handlers.items()):
            header_add_note = partial(
                self.message.add_note,
                "header-%s" % header_handler.canonical_name.lower(),
                field_name=header_handler.canonical_name,
            )
            header_handler.finish(self.message, header_add_note)  # type: ignore
            parsed_headers[header_handler.norm_name] = header_handler.value

        return unicode_headers, parsed_headers
Beispiel #20
0
def checkCaching(response: HttpResponse, request: HttpRequest = None) -> None:
    "Examine HTTP caching characteristics."

    # get header values
    lm_hdr = response.parsed_headers.get("last-modified", None)
    date_hdr = response.parsed_headers.get("date", None)
    expires_hdr = response.parsed_headers.get("expires", None)
    etag_hdr = response.parsed_headers.get("etag", None)
    age_hdr = response.parsed_headers.get("age", None)
    cc_set = response.parsed_headers.get("cache-control", [])
    cc_list = [k for (k, v) in cc_set]
    cc_dict = dict(cc_set)
    cc_keys = list(cc_dict.keys())

    # Last-Modified
    if lm_hdr:
        serv_date = date_hdr or response.start_time
        if lm_hdr > serv_date:
            response.add_note("header-last-modified", LM_FUTURE)
        else:
            response.add_note(
                "header-last-modified",
                LM_PRESENT,
                last_modified_string=relative_time(lm_hdr, serv_date),
            )

    # known Cache-Control directives that don't allow duplicates
    known_cc = [
        "max-age",
        "no-store",
        "s-maxage",
        "public",
        "private",
        "pre-check",
        "post-check",
        "stale-while-revalidate",
        "stale-if-error",
    ]

    # check for mis-capitalised directives /
    # assure there aren't any dup directives with different values
    for cc in cc_keys:
        if cc.lower() in known_cc and cc != cc.lower():
            response.add_note("header-cache-control",
                              CC_MISCAP,
                              cc_lower=cc.lower(),
                              cc=cc)
        if cc in known_cc and cc_list.count(cc) > 1:
            response.add_note("header-cache-control", CC_DUP, cc=cc)

    # Who can store this?
    if request and request.method not in cacheable_methods:
        response.store_shared = response.store_private = False
        request.add_note("method", METHOD_UNCACHEABLE, method=request.method)
        return  # bail; nothing else to see here
    if "no-store" in cc_keys:
        response.store_shared = response.store_private = False
        response.add_note("header-cache-control", NO_STORE)
        return  # bail; nothing else to see here
    if "private" in cc_keys:
        response.store_shared = False
        response.store_private = True
        response.add_note("header-cache-control", PRIVATE_CC)
    elif (request
          and "authorization" in [k.lower() for k, v in request.headers]
          and "public" not in cc_keys):
        response.store_shared = False
        response.store_private = True
        response.add_note("header-cache-control", PRIVATE_AUTH)
    else:
        response.store_shared = response.store_private = True
        response.add_note("header-cache-control", STOREABLE)

    # no-cache?
    if "no-cache" in cc_keys:
        if lm_hdr is None and etag_hdr is None:
            response.add_note("header-cache-control", NO_CACHE_NO_VALIDATOR)
        else:
            response.add_note("header-cache-control", NO_CACHE)
        return

    # pre-check / post-check
    if "pre-check" in cc_keys or "post-check" in cc_keys:
        if "pre-check" not in cc_keys or "post-check" not in cc_keys:
            response.add_note("header-cache-control", CHECK_SINGLE)
        else:
            pre_check = post_check = None
            try:
                pre_check = int(cc_dict["pre-check"])
                post_check = int(cc_dict["post-check"])
            except ValueError:
                response.add_note("header-cache-control", CHECK_NOT_INTEGER)
            if pre_check is not None and post_check is not None:
                if pre_check == 0 and post_check == 0:
                    response.add_note("header-cache-control", CHECK_ALL_ZERO)
                elif post_check > pre_check:
                    response.add_note("header-cache-control",
                                      CHECK_POST_BIGGER)
                    post_check = pre_check
                elif post_check == 0:
                    response.add_note("header-cache-control", CHECK_POST_ZERO)
                else:
                    response.add_note(
                        "header-cache-control",
                        CHECK_POST_PRE,
                        pre_check=pre_check,
                        post_check=post_check,
                    )

    # vary?
    vary = response.parsed_headers.get("vary", set())
    if "*" in vary:
        response.add_note("header-vary", VARY_ASTERISK)
        return  # bail; nothing else to see here
    if len(vary) > 3:
        response.add_note("header-vary",
                          VARY_COMPLEX,
                          vary_count=f_num(len(vary)))
    else:
        if "user-agent" in vary:
            response.add_note("header-vary", VARY_USER_AGENT)
        if "host" in vary:
            response.add_note("header-vary", VARY_HOST)

    # calculate age
    response.age = age_hdr or 0
    age_str = relative_time(response.age, 0, 0)
    if date_hdr and date_hdr > 0:
        apparent_age = max(0, int(response.start_time - date_hdr))
    else:
        apparent_age = 0
    current_age = max(apparent_age, response.age)
    current_age_str = relative_time(current_age, 0, 0)
    if response.age >= 1:
        response.add_note("header-age header-date", CURRENT_AGE, age=age_str)

    # Check for clock skew and dateless origin server.
    if not date_hdr:
        response.add_note("", DATE_CLOCKLESS)
        if expires_hdr or lm_hdr:
            response.add_note("header-expires header-last-modified",
                              DATE_CLOCKLESS_BAD_HDR)
    else:
        skew = date_hdr - response.start_time + (response.age)
        if response.age > max_clock_skew and (current_age -
                                              skew) < max_clock_skew:
            response.add_note("header-date header-age", AGE_PENALTY)
        elif abs(skew) > max_clock_skew:
            response.add_note(
                "header-date",
                DATE_INCORRECT,
                clock_skew_string=relative_time(skew, 0, 2),
            )
        else:
            response.add_note("header-date", DATE_CORRECT)

    # calculate freshness
    freshness_lifetime = 0
    has_explicit_freshness = False
    has_cc_freshness = False
    freshness_hdrs = ["header-date"]
    if "s-maxage" in cc_keys:
        freshness_lifetime = cc_dict["s-maxage"]
        freshness_hdrs.append("header-cache-control")
        has_explicit_freshness = True
        has_cc_freshness = True
    elif "max-age" in cc_keys:
        freshness_lifetime = cc_dict["max-age"]
        freshness_hdrs.append("header-cache-control")
        has_explicit_freshness = True
        has_cc_freshness = True
    elif "expires" in response.parsed_headers:
        # An invalid Expires header means it's automatically stale
        has_explicit_freshness = True
        freshness_hdrs.append("header-expires")
        freshness_lifetime = (expires_hdr or 0) - (date_hdr
                                                   or int(response.start_time))

    freshness_left = freshness_lifetime - current_age
    freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0)
    freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0)

    response.freshness_lifetime = freshness_lifetime
    fresh = freshness_left > 0
    if has_explicit_freshness:
        if fresh:
            response.add_note(
                " ".join(freshness_hdrs),
                FRESHNESS_FRESH,
                freshness_lifetime=freshness_lifetime_str,
                freshness_left=freshness_left_str,
                current_age=current_age_str,
            )
        elif has_cc_freshness and response.age > freshness_lifetime:
            response.add_note(
                " ".join(freshness_hdrs),
                FRESHNESS_STALE_CACHE,
                freshness_lifetime=freshness_lifetime_str,
                freshness_left=freshness_left_str,
                current_age=current_age_str,
            )
        else:
            response.add_note(
                " ".join(freshness_hdrs),
                FRESHNESS_STALE_ALREADY,
                freshness_lifetime=freshness_lifetime_str,
                freshness_left=freshness_left_str,
                current_age=current_age_str,
            )

    # can heuristic freshness be used?
    elif response.status_code in heuristic_cacheable_status:
        response.add_note("header-last-modified", FRESHNESS_HEURISTIC)
    else:
        response.add_note("", FRESHNESS_NONE)

    # can stale responses be served?
    if "must-revalidate" in cc_keys:
        if fresh:
            response.add_note("header-cache-control", FRESH_MUST_REVALIDATE)
        elif has_explicit_freshness:
            response.add_note("header-cache-control", STALE_MUST_REVALIDATE)
    elif "proxy-revalidate" in cc_keys or "s-maxage" in cc_keys:
        if fresh:
            response.add_note("header-cache-control", FRESH_PROXY_REVALIDATE)
        elif has_explicit_freshness:
            response.add_note("header-cache-control", STALE_PROXY_REVALIDATE)
    else:
        if fresh:
            response.add_note("header-cache-control", FRESH_SERVABLE)
        elif has_explicit_freshness:
            response.add_note("header-cache-control", STALE_SERVABLE)

    # public?
    if "public" in cc_keys:  # TODO: check for authentication in request
        response.add_note("header-cache-control", PUBLIC)
Beispiel #21
0
 def format_size(self, value):
     if value is None:
         return u'<td>-</td>'
     else:
         return u'<td>%s</td>' % f_num(value, by1024=True)
Beispiel #22
0
 def format_size(value: int) -> str:
     if value is None:
         return '<td>-</td>'
     else:
         return '<td>%s</td>' % f_num(value, by1024=True)
Beispiel #23
0
def checkCaching(response: HttpResponse, request: HttpRequest=None) -> None:
    "Examine HTTP caching characteristics."

    # get header values
    lm_hdr = response.parsed_headers.get('last-modified', None)
    date_hdr = response.parsed_headers.get('date', None)
    expires_hdr = response.parsed_headers.get('expires', None)
    etag_hdr = response.parsed_headers.get('etag', None)
    age_hdr = response.parsed_headers.get('age', None)
    cc_set = response.parsed_headers.get('cache-control', [])
    cc_list = [k for (k, v) in cc_set]
    cc_dict = dict(cc_set)
    cc_keys = list(cc_dict.keys())

    # Last-Modified
    if lm_hdr:
        serv_date = date_hdr or response.start_time
        if lm_hdr > serv_date:
            response.add_note('header-last-modified', LM_FUTURE)
        else:
            response.add_note('header-last-modified', LM_PRESENT,
                              last_modified_string=relative_time(lm_hdr, serv_date))

    # known Cache-Control directives that don't allow duplicates
    known_cc = ["max-age", "no-store", "s-maxage", "public",
                "private", "pre-check", "post-check",
                "stale-while-revalidate", "stale-if-error"]

    # check for mis-capitalised directives /
    # assure there aren't any dup directives with different values
    for cc in cc_keys:
        if cc.lower() in known_cc and cc != cc.lower():
            response.add_note('header-cache-control', CC_MISCAP,
                              cc_lower=cc.lower(), cc=cc)
        if cc in known_cc and cc_list.count(cc) > 1:
            response.add_note('header-cache-control', CC_DUP, cc=cc)

    # Who can store this?
    if request and request.method not in cacheable_methods:
        response.store_shared = response.store_private = False
        request.add_note('method', METHOD_UNCACHEABLE, method=request.method)
        return # bail; nothing else to see here
    elif 'no-store' in cc_keys:
        response.store_shared = response.store_private = False
        response.add_note('header-cache-control', NO_STORE)
        return # bail; nothing else to see here
    elif 'private' in cc_keys:
        response.store_shared = False
        response.store_private = True
        response.add_note('header-cache-control', PRIVATE_CC)
    elif request and 'authorization' in [k.lower() for k, v in request.headers] \
      and 'public' not in cc_keys:
        response.store_shared = False
        response.store_private = True
        response.add_note('header-cache-control', PRIVATE_AUTH)
    else:
        response.store_shared = response.store_private = True
        response.add_note('header-cache-control', STOREABLE)

    # no-cache?
    if 'no-cache' in cc_keys:
        if lm_hdr is None and etag_hdr is None:
            response.add_note('header-cache-control', NO_CACHE_NO_VALIDATOR)
        else:
            response.add_note('header-cache-control', NO_CACHE)
        return

    # pre-check / post-check
    if 'pre-check' in cc_keys or 'post-check' in cc_keys:
        if 'pre-check' not in cc_keys or 'post-check' not in cc_keys:
            response.add_note('header-cache-control', CHECK_SINGLE)
        else:
            pre_check = post_check = None
            try:
                pre_check = int(cc_dict['pre-check'])
                post_check = int(cc_dict['post-check'])
            except ValueError:
                response.add_note('header-cache-control', CHECK_NOT_INTEGER)
            if pre_check is not None and post_check is not None:
                if pre_check == 0 and post_check == 0:
                    response.add_note('header-cache-control', CHECK_ALL_ZERO)
                elif post_check > pre_check:
                    response.add_note('header-cache-control', CHECK_POST_BIGGER)
                    post_check = pre_check
                elif post_check == 0:
                    response.add_note('header-cache-control', CHECK_POST_ZERO)
                else:
                    response.add_note('header-cache-control', CHECK_POST_PRE,
                                      pre_check=pre_check, post_check=post_check)

    # vary?
    vary = response.parsed_headers.get('vary', set())
    if "*" in vary:
        response.add_note('header-vary', VARY_ASTERISK)
        return # bail; nothing else to see here
    elif len(vary) > 3:
        response.add_note('header-vary', VARY_COMPLEX, vary_count=f_num(len(vary)))
    else:
        if "user-agent" in vary:
            response.add_note('header-vary', VARY_USER_AGENT)
        if "host" in vary:
            response.add_note('header-vary', VARY_HOST)
        # TODO: enumerate the axes in a message

    # calculate age
    response.age = age_hdr or 0
    age_str = relative_time(response.age, 0, 0)
    if date_hdr and date_hdr > 0:
        apparent_age = max(0, int(response.start_time - date_hdr))
    else:
        apparent_age = 0
    current_age = max(apparent_age, response.age)
    current_age_str = relative_time(current_age, 0, 0)
    if response.age >= 1:
        response.add_note('header-age header-date', CURRENT_AGE, age=age_str)

    # Check for clock skew and dateless origin server.
    if not date_hdr:
        response.add_note('', DATE_CLOCKLESS)
        if expires_hdr or lm_hdr:
            response.add_note('header-expires header-last-modified', DATE_CLOCKLESS_BAD_HDR)
    else:
        skew = date_hdr - response.start_time + (response.age)
        if response.age > max_clock_skew and (current_age - skew) < max_clock_skew:
            response.add_note('header-date header-age', AGE_PENALTY)
        elif abs(skew) > max_clock_skew:
            response.add_note('header-date', DATE_INCORRECT,
                              clock_skew_string=relative_time(skew, 0, 2))
        else:
            response.add_note('header-date', DATE_CORRECT)

    # calculate freshness
    freshness_lifetime = 0
    has_explicit_freshness = False
    has_cc_freshness = False
    freshness_hdrs = ['header-date']
    if 's-maxage' in cc_keys:
        freshness_lifetime = cc_dict['s-maxage']
        freshness_hdrs.append('header-cache-control')
        has_explicit_freshness = True
        has_cc_freshness = True
    elif 'max-age' in cc_keys:
        freshness_lifetime = cc_dict['max-age']
        freshness_hdrs.append('header-cache-control')
        has_explicit_freshness = True
        has_cc_freshness = True
    elif 'expires' in response.parsed_headers:
        # An invalid Expires header means it's automatically stale
        has_explicit_freshness = True
        freshness_hdrs.append('header-expires')
        freshness_lifetime = (expires_hdr or 0) - (date_hdr or response.start_time)

    freshness_left = freshness_lifetime - current_age
    freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0)
    freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0)

    response.freshness_lifetime = freshness_lifetime
    fresh = freshness_left > 0
    if has_explicit_freshness:
        if fresh:
            response.add_note(" ".join(freshness_hdrs), FRESHNESS_FRESH,
                              freshness_lifetime=freshness_lifetime_str,
                              freshness_left=freshness_left_str,
                              current_age=current_age_str)
        # FIXME: response.age = None
        elif has_cc_freshness and response.age > freshness_lifetime:
            response.add_note(" ".join(freshness_hdrs), FRESHNESS_STALE_CACHE,
                              freshness_lifetime=freshness_lifetime_str,
                              freshness_left=freshness_left_str,
                              current_age=current_age_str)
        else:
            response.add_note(" ".join(freshness_hdrs), FRESHNESS_STALE_ALREADY,
                              freshness_lifetime=freshness_lifetime_str,
                              freshness_left=freshness_left_str,
                              current_age=current_age_str)

    # can heuristic freshness be used?
    elif response.status_code in heuristic_cacheable_status:
        response.add_note('header-last-modified', FRESHNESS_HEURISTIC)
    else:
        response.add_note('', FRESHNESS_NONE)

    # can stale responses be served?
    if 'must-revalidate' in cc_keys:
        if fresh:
            response.add_note('header-cache-control', FRESH_MUST_REVALIDATE)
        elif has_explicit_freshness:
            response.add_note('header-cache-control', STALE_MUST_REVALIDATE)
    elif 'proxy-revalidate' in cc_keys or 's-maxage' in cc_keys:
        if fresh:
            response.add_note('header-cache-control', FRESH_PROXY_REVALIDATE)
        elif has_explicit_freshness:
            response.add_note('header-cache-control', STALE_PROXY_REVALIDATE)
    else:
        if fresh:
            response.add_note('header-cache-control', FRESH_SERVABLE)
        elif has_explicit_freshness:
            response.add_note('header-cache-control', STALE_SERVABLE)

    # public?
    if 'public' in cc_keys: # TODO: check for authentication in request
        response.add_note('header-cache-control', PUBLIC)
Beispiel #24
0
def process_headers(msg):
    """
    Parse and check the message for obvious syntactic errors,
    as well as semantic errors that are self-contained (i.e.,
    it can be determined without examining other headers, etc.).

    Using msg.headers, it populates:
      - .headers with a Unicode version of the input
      - .parsed_headers with a dictionary of parsed header values
    """

    hdr_dict = {}
    header_block_size = len(msg.version)
    if msg.is_request:
        header_block_size += len(msg.method) + len(msg.uri) + 2
    else:
        header_block_size += len(msg.status_phrase) + 5
    clean_hdrs = []      # unicode version of the header tuples
    parsed_hdrs = {}     # dictionary of parsed header values
    offset = 0
    for name, value in msg.headers:
        offset += 1
        subject = "offset-%s" % offset
        hdr_size = len(name) + len(value)
        if hdr_size > MAX_HDR_SIZE:
            msg.add_note(subject, rs.HEADER_TOO_LARGE,
               header_name=name, header_size=f_num(hdr_size))
        header_block_size += hdr_size
        
        # decode the header to make it unicode clean
        try:
            name = name.decode('ascii', 'strict')
        except UnicodeError:
            name = name.decode('ascii', 'ignore')
            msg.add_note(subject, rs.HEADER_NAME_ENCODING,
                header_name=name)
        try:
            value = value.decode('ascii', 'strict')
        except UnicodeError:
            value = value.decode('iso-8859-1', 'replace')
            msg.add_note(subject, rs.HEADER_VALUE_ENCODING,
                header_name=name)
        clean_hdrs.append((name, value))
        msg.set_context(field_name=name)
        
        # check field name syntax
        if not re.match("^\s*%s\s*$" % syntax.TOKEN, name, re.VERBOSE):
            msg.add_note(subject, rs.FIELD_NAME_BAD_SYNTAX)
            continue

        norm_name = name.lower()
        value = value.strip()
        
        hdr_parse = load_header_func(norm_name, 'parse')
        if hdr_parse:
            if hasattr(hdr_parse, 'pre_parse'):
                values = hdr_parse.pre_parse(value)
            else:
                values = [value]
            for value in values:
                if not hdr_dict.has_key(norm_name):
                    hdr_dict[norm_name] = (name, [])
                parsed_value = hdr_parse(subject, value, msg)
                if parsed_value != None:
                    hdr_dict[norm_name][1].append(parsed_value)
        
    # replace the original header tuple with ones that are clean unicode
    msg.headers = clean_hdrs

    # join parsed header values
    for norm_name, (orig_name, values) in hdr_dict.items():
        msg.set_context(field_name=orig_name)
        hdr_join = load_header_func(norm_name, 'join')
        if hdr_join:
            subject = "header-%s" % norm_name
            joined_value = hdr_join(subject, values, msg)
            if joined_value == None:
                continue
            parsed_hdrs[norm_name] = joined_value
    msg.parsed_headers = parsed_hdrs

    # check the total header block size
    if header_block_size > MAX_TTL_HDR:
        msg.add_note('header', rs.HEADER_BLOCK_TOO_LARGE,
            header_block_size=f_num(header_block_size))