Exemple #1
0
def _parse_response_heading(req, stream):
    beginning = stream.point
    try:
        with stream:
            version_ = HTTPVersion(stream.consume_regex(rfc7230.HTTP_version))
            stream.consume_regex(SP)
            status = StatusCode(stream.consume_regex(rfc7230.status_code))
            stream.consume_regex(SP)
            reason = stream.consume_regex(rfc7230.reason_phrase)
            _parse_line_ending(stream)
            entries = parse_header_fields(stream)
    except ParseError as e:
        stream.complain(1009, error=e)
        stream.sane = False
        return Unavailable
    else:
        resp = Response(version_,
                        status,
                        reason,
                        entries,
                        body=None,
                        remark=u'from %s, offset %d' %
                        (stream.name, beginning))
        resp.request = req
        stream.dump_complaints(resp.complain, place=u'response heading')
        return resp
Exemple #2
0
 def _from_record(self, record):
     value = record.find('iana:value', self.xmlns).text
     if not value.isdigit():
         return None
     description = record.find('iana:description', self.xmlns).text
     if description.lower() == 'unassigned':
         return None
     return {
         'key': StatusCode(value),
         'citation': self.extract_citation(record),
         'title': description,
     }
Exemple #3
0
def _process_response(data, req, creator, path):
    if data['status'] == 0:          # Indicates error in Chrome.
        return None
    version, header_entries = _process_message(data, creator)
    status = StatusCode(data['status'])
    reason = data['statusText']

    if creator in FIDDLER and req.method == m.CONNECT and status.successful:
        # Fiddler's HAR export adds extra debug headers to CONNECT responses
        # after the tunnel is closed.
        header_entries = [(name, value)
                          for (name, value) in header_entries
                          if name not in [u'EndTime', u'ClientToServerBytes',
                                          u'ServerToClientBytes']]

    # The logic for body is mostly like that for requests (see above).
    if data['bodySize'] == 0 or data['content']['size'] == 0 or \
            status == st.not_modified:      # Firefox also includes body on 304
        body = b''
    elif creator in FIREFOX:
        # Firefox seems to exports bogus bodySize:
        # see test/har_data/firefox_gif.har
        body = None
    # Browsers may set ``bodySize = -1`` even when ``content.size >= 0``.
    elif data['bodySize'] > 0 or data['content']['size'] > 0:
        body = Unavailable()
    else:
        body = None

    resp = Response(version, status, reason, header_entries, body=body,
                    remark=u'from %s' % path)

    if data['content'].get('text') and status != st.not_modified:
        if data['content'].get('encoding', u'').lower() == u'base64':
            try:
                decoded_body = base64.b64decode(data['content']['text'])
            except ValueError:
                pass
            else:
                if creator in FIDDLER and req.method == m.CONNECT and \
                        status.successful and b'Fiddler' in decoded_body:
                    # Fiddler's HAR export adds a body with debug information
                    # to CONNECT responses.
                    resp.body = b''
                else:
                    resp.decoded_body = decoded_body

        elif 'encoding' not in data['content']:
            resp.unicode_body = data['content']['text']

    return resp
Exemple #4
0
def _parse_response_heading(req, stream):
    beginning = stream.tell()
    with stream.parsing(status_line):
        line = stream.readline()
        pieces = line.split(u' ', 2)
        if len(pieces) != 3 or \
                not HTTP_VERSION.match(pieces[0]) or \
                not STATUS_CODE.match(pieces[1]):
            raise stream.error(beginning)
    version_ = HTTPVersion(pieces[0])
    status = StatusCode(pieces[1])
    reason = pieces[2]
    entries = parse_header_fields(stream)
    with stream.parsing(HTTP_message):
        stream.readlineend()
    resp = Response(
        version_, status, reason, entries, body=None,
        remark=u'from %s, offset %d' % (stream.name, beginning))
    resp.request = req
    stream.dump_complaints(resp.complain, place=u'response heading')
    return resp
Exemple #5
0
def _process_response(data, req, creator, path):
    if data['status'] == 0:  # Indicates error in Chrome.
        return None
    (version, header_entries, _) = _process_message(data, creator)
    status = StatusCode(data['status'])
    reason = data['statusText']

    if creator.is_firefox:
        # Firefox joins all ``Set-Cookie`` response fields with newlines.
        # (It also joins other fields with commas,
        # but that is permitted by RFC 7230 Section 3.2.2.)
        header_entries = [(name, value)
                          for (name, joined_value) in header_entries
                          for value in (joined_value.split(u'\n') if name ==
                                        h.set_cookie else [joined_value])]

    if creator.is_fiddler and req.method == m.CONNECT and status.successful:
        # Fiddler's HAR export adds extra debug headers to CONNECT responses
        # after the tunnel is closed.
        header_entries = [
            (name, value) for (name, value) in header_entries if name not in
            [u'EndTime', u'ClientToServerBytes', u'ServerToClientBytes']
        ]

    # The logic for body is similar to that for requests (see above),
    # except that
    # (1) Firefox also includes a body with 304 responses;
    # (2) browsers may set ``bodySize = -1`` even when ``content.size >= 0``.
    if data['bodySize'] == 0 or data['content']['size'] == 0 or \
            status == st.not_modified:
        body = b''
    elif data['bodySize'] > 0 or data['content']['size'] > 0:
        body = Unavailable()
    else:
        body = None

    if version == http11 and creator.is_firefox and \
            any(name == u'x-firefox-spdy' for (name, _) in header_entries):
        # Helps with SPDY in Firefox.
        version = None
    if creator.is_chrome and version != req.version:
        # Helps with SPDY in Chrome.
        version = None

    resp = Response(version,
                    status,
                    reason,
                    header_entries,
                    body=body,
                    remark=u'from %s' % path)

    if data['content'].get('text') and status != st.not_modified:
        if data['content'].get('encoding', u'').lower() == u'base64':
            try:
                decoded_body = base64.b64decode(data['content']['text'])
            except ValueError:
                # Firefox sometimes marks normal, unencoded text as "base64"
                # (see ``test/har_data/firefox_gif.har``).
                # But let's not try to guess.
                pass
            else:
                if creator.is_fiddler and req.method == m.CONNECT and \
                        status.successful and b'Fiddler' in decoded_body:
                    # Fiddler's HAR export adds a body with debug information
                    # to CONNECT responses.
                    resp.body = b''
                else:
                    resp.decoded_body = decoded_body

        elif 'encoding' not in data['content']:
            resp.unicode_body = data['content']['text']

    return resp
Exemple #6
0
#     Obvious, and usually filled by ``tools/iana.py``.
#
#   ``_title``
#     The default reason phrase, usually filled by ``tools/iana.py``.
#
#   ``cacheable``
#     If the status code is defined as cacheable by default,
#     set this to ``BY_DEFAULT``.
#     If it is defined as never cacheable, set to ``NOT_AT_ALL``.
#     Otherwise, set to ``NOT_BY_DEFAULT``.

known = KnownDict(
    StatusCode,
    [
        {
            '_': StatusCode(100),
            '_citations': [RFC(7231, section=(6, 2, 1))],
            '_title': u'Continue',
            'cacheable': NOT_BY_DEFAULT
        },
        {
            '_': StatusCode(101),
            '_citations': [RFC(7231, section=(6, 2, 2))],
            '_title': u'Switching Protocols',
            'cacheable': NOT_BY_DEFAULT
        },
        {
            '_': StatusCode(102),
            '_citations': [RFC(2518)],
            '_title': u'Processing'
        },
Exemple #7
0
    def __init__(self,
                 version,
                 status,
                 reason,
                 header_entries,
                 body,
                 trailer_entries=None,
                 remark=None):
        """
        :param version:
            The response's protocol version, as a Unicode string,
            or `None` if unknown (this disables some checks).

            For responses sent over HTTP/1.x connections,
            this should be the HTTP version sent in the `status line`__,
            such as ``u'HTTP/1.0'`` or ``u'HTTP/1.1'``.

            __ https://tools.ietf.org/html/rfc7230#section-3.1.2

            For responses sent over HTTP/2 connections,
            this should be ``u'HTTP/2'``.

        :param status:
            The response's status code, as an integer.

        :param reason:
            The response's reason phrase (such as "OK" or "Not Found"),
            as a Unicode string, or `None` if unknown (as in HTTP/2).

        :param header_entries:
            A list of the response's headers (may be empty).
            It must **not** include HTTP/2 `pseudo-headers`__.

            __ https://tools.ietf.org/html/rfc7540#section-8.1.2.1

            Every item of the list must be a ``(name, value)`` pair.

            `name` must be a Unicode string.

            `value` may be a byte string or a Unicode string.
            If it is Unicode, HTTPolice will assume that it has been decoded
            from ISO-8859-1 (the historic encoding of HTTP),
            and will encode it back into ISO-8859-1 before any processing.

        :param body:
            The response's payload body, as a **byte string**,
            or `None` if unknown (this disables some checks).

            If the response has no payload (like 204 or 304 responses),
            this should be the empty string ``b''``.

            This must be the payload body as `defined by RFC 7230`__:
            **after** removing any ``Transfer-Encoding`` (like ``chunked``),
            but **before** removing any ``Content-Encoding`` (like ``gzip``).

            __ https://tools.ietf.org/html/rfc7230#section-3.3

        :param trailer_entries:
            A list of headers from the response's trailer part
            (as found in `chunked coding`__ or `HTTP/2`__),
            or `None` if there is no trailer part.

            __ https://tools.ietf.org/html/rfc7230#section-4.1.2
            __ https://tools.ietf.org/html/rfc7540#section-8.1

            The format is the same as for `header_entries`.

        :param remark:
            If not `None`, this Unicode string will be shown
            above this response in HTML reports
            (when the appropriate option is enabled).
            For example, it can be used to identify the source of the data:
            ``u'from somefile.dat, offset 1337'``.

        """
        super(Response, self).__init__(version, header_entries, body,
                                       trailer_entries, remark)
        self.status = StatusCode(status)
        self.reason = force_unicode(reason) if reason is not None else None
        self.request = None