Exemple #1
0
    def __init__(self, kind=2, decompress=False):
        self.kind = kind
        self.decompress = decompress

        # errors vars
        self.errno = None
        self.errstr = ""

        # protected variables
        self._buf = []
        self._version = None
        self._method = None
        self._status_code = None
        self._status = None
        self._reason = None
        self._url = None
        self._path = None
        self._query_string = None
        self._fragment= None
        self._headers = IOrderedDict()
        self._environ = dict()
        self._chunked = False
        self._body = []
        self._trailers = None
        self._partial_body = False
        self._clen = None
        self._clen_rest = None

        # private events
        self.__on_firstline = False
        self.__on_headers_complete = False
        self.__on_message_begin = False
        self.__on_message_complete = False

        self.__decompress_obj = None
class HttpParser(object):
    def __init__(self, kind=2, decompress=False):
        self.kind = kind
        self.decompress = decompress

        # errors vars
        self.errno = None
        self.errstr = ""

        # protected variables

        # containers
        self._buf = ""
        self._body = ""
        self._environ = dict()
        self._headers = IOrderedDict()

        # variables
        self._version = None
        self._method = None
        self._status_code = None
        self._status = None
        self._reason = None
        self._url = None
        self._path = None
        self._query_string = None
        self._fragment = None

        # flags
        self._chunked = False
        self._have_trailer = False
        self._have_body = False
        self.nb_parsed = 0
        self._clen = 0

        # private events
        self.__on_firstline = False
        self.__on_headers_complete = False
        self.__on_message_begin = False
        self.__on_message_complete = False

        self.__decompress_obj = None
        self.__decompress_first_try = True

    def get_version(self):
        return self._version

    def get_method(self):
        return self._method

    def get_status_code(self):
        return self._status_code

    def get_reason(self):
        return self._reason

    def get_url(self):
        return self._url

    def get_path(self):
        return self._path

    def get_query_string(self):
        return self._query_string

    def get_fragment(self):
        return self._fragment

    def get_headers(self):
        return self._headers

    def get_wsgi_environ(self):
        if not self.__on_headers_complete:
            return None

        environ = self._environ.copy()
        # clean special keys
        for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"):
            hkey = "HTTP_%s" % key
            if hkey in environ:
                environ[key] = environ.pop(hkey)

        script_name = environ.get('SCRIPT_NAME',
                                  os.environ.get("SCRIPT_NAME", ""))
        if script_name:
            path_info = self._path.split(script_name, 1)[1]
            environ.update({
                "PATH_INFO": unquote(path_info),
                "SCRIPT_NAME": script_name})
        else:
            environ['SCRIPT_NAME'] = ""

        if environ.get('HTTP_X_FORWARDED_PROTOCOL', '').lower() == "ssl":
            environ['wsgi.url_scheme'] = "https"
        elif environ.get('HTTP_X_FORWARDED_SSL', '').lower() == "on":
            environ['wsgi.url_scheme'] = "https"
        else:
            environ['wsgi.url_scheme'] = "http"

        return environ

    def get_body(self):
        """ return last chunk of the parsed body"""
        return self._body

    def is_upgrade(self):
        """ Do we get upgrade header in the request. Useful for
        websockets """
        return self._headers.get('connection', "").lower() == "upgrade"

    def is_headers_complete(self):
        """ return True if all headers have been parsed. """
        return self.__on_headers_complete

    def is_partial_body(self):
        """ return True if a chunk of body have been parsed """
        return self._have_body

    def is_message_begin(self):
        """ return True if the parsing start """
        return self.__on_message_begin

    def is_message_complete(self):
        """ return True if the parsing is done (we get EOF) """
        return self.__on_message_complete

    def is_chunked(self):
        """ return True if Transfer-Encoding header value is chunked"""
        return self._chunked

    def should_keep_alive(self):
        """ return True if the connection should be kept alive
        """
        hconn = self._headers.get('connection', "").lower()
        if hconn == "close":
            return False
        elif hconn == "keep-alive":
            return True
        return self._version == (1, 1)

    def execute(self, data):
        # end of body can be passed manually by putting a length of 0

        if len(data) == 0:
            self.__on_message_complete = True
            return 0

        # start to parse
        while True:
            if not self.__on_firstline:
                # try to extract first line
                idx = data.find("\r\n")
                if idx < 0:
                    self.errno = BAD_FIRST_LINE
                    self.errstr = "Invalid HTTP request/status line"
                    return BAD_FIRST_LINE
                else:
                    self.__on_firstline = True
                    first_line, self._buf = data[:idx], data[idx + 2:]
                    self.nb_parsed += idx + 2
                    data = ""
                    if not self._parse_firstline(first_line):
                        return BAD_FIRST_LINE
            elif not self.__on_headers_complete:
                try:
                    to_parse = self._buf
                    ret = self._parse_headers(to_parse)
                    self.__on_headers_complete = True
                    self.nb_parsed += len(to_parse) - ret
                except InvalidHeader as e:
                    self.errno = INVALID_HEADER
                    self.errstr = str(e)
                    return INVALID_HEADER
            elif not self.__on_message_complete:
                if not self.__on_message_begin:
                    self.__on_message_begin = True

                to_parse = self._buf
                ret = self._parse_body(to_parse)
                # this is a normal request
                if ret is None:
                    self.__on_message_complete = True
                elif ret == 0:
                    # finished parsing
                    self.__on_message_complete = True
                elif ret < 0:
                    # on error
                    return ret
                elif ret > 0:
                    # parsed a trunk, do nothing
                    pass
            else:
                return self.nb_parsed

    def _parse_firstline(self, line):
        try:
            if self.kind == 2:  # auto detect
                try:
                    self._parse_request_line(line)
                except InvalidRequestLine:
                    self._parse_response_line(line)
            elif self.kind == 1:
                self._parse_response_line(line)
            elif self.kind == 0:
                self._parse_request_line(line)
        except InvalidRequestLine as e:
            self.errno = BAD_FIRST_LINE
            self.errstr = str(e)
            return False
        return True

    def _parse_response_line(self, line):
        bits = line.split(None, 1)
        if len(bits) != 2:
            raise InvalidRequestLine(line)

        # version
        matchv = VERSION_RE.match(bits[0])
        if matchv is None:
            raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0])
        self._version = (int(matchv.group(1)), int(matchv.group(2)))

        # status
        matchs = STATUS_RE.match(bits[1])
        if matchs is None:
            raise InvalidRequestLine("Invalid status %" % bits[1])

        self._status = bits[1]
        self._status_code = int(matchs.group(1))
        self._reason = matchs.group(2)

    def _parse_request_line(self, line):
        bits = line.split(None, 2)
        if len(bits) != 3:
            raise InvalidRequestLine(line)

        # Method
        if not METHOD_RE.match(bits[0]):
            raise InvalidRequestLine("invalid Method: %s" % bits[0])
        self._method = bits[0].upper()

        # URI
        self._url = bits[1]
        parts = urlparse.urlsplit(bits[1])
        self._path = parts.path or ""
        self._query_string = parts.query or ""
        self._fragment = parts.fragment or ""

        # Version
        match = VERSION_RE.match(bits[2])
        if match is None:
            raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2])
        self._version = (int(match.group(1)), int(match.group(2)))

        # update environ
        if hasattr(self, 'environ'):
            self._environ.update({
                "PATH_INFO": self._path,
                "QUERY_STRING": self._query_string,
                "RAW_URI": self._url,
                "REQUEST_METHOD": self._method,
                "SERVER_PROTOCOL": bits[2]})

    def _parse_headers(self, data):
        idx = data.find("\r\n\r\n")
        if idx < 0:  # we don't have all headers
            raise InvalidHeader('Headers not complete')

        # Split lines on \r\n keeping the \r\n on each line
        lines = [line + "\r\n" for line in
                 data[:idx].split("\r\n")]

        # Parse headers into key/value pairs paying attention
        # to continuation lines.
        while len(lines):
            # Parse initial header name : value pair.
            curr = lines.pop(0)
            if curr.find(":") < 0:
                raise InvalidHeader("invalid line %s" % curr.strip())
            name, value = curr.split(":", 1)
            name = name.rstrip(" \t").upper()
            if HEADER_RE.search(name):
                raise InvalidHeader("invalid header name %s" % name)

            if value.endswith("\r\n"):
                value = value[:-2]

            name, value = name.strip(), [value.lstrip()]

            # Consume value continuation lines
            while len(lines) and lines[0].startswith((" ", "\t")):
                curr = lines.pop(0)
                if curr.endswith("\r\n"):
                    curr = curr[:-2]
                value.append(curr)
            value = ''.join(value).rstrip()

            # multiple headers
            if name in self._headers:
                value = "%s, %s" % (self._headers[name], value)

            # store new header value
            self._headers[name] = value

            # update WSGI environ
            key = 'HTTP_%s' % name.upper().replace('-', '_')
            self._environ[key] = value

        # detect now if body is sent by chunks.
        clen = self._headers.get('content-length')
        te = self._headers.get('transfer-encoding', '').lower()
        if self._headers.get('Trailer'):
            self._have_trailer = True

        if clen is not None:
            try:
                self._clen = int(clen)
            except ValueError:
                pass
        else:
            self._chunked = (te == 'chunked')
            if not self._chunked:
                self._clen_rest = MAXSIZE

        # detect encoding and set decompress object
        encoding = self._headers.get('content-encoding')
        if self.decompress:
            if encoding == "gzip":
                self.__decompress_obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
                self.__decompress_first_try = False
            elif encoding == "deflate":
                self.__decompress_obj = zlib.decompressobj()

        rest = data[idx + 4:]
        self._buf = rest
        return len(rest)

    def _parse_body(self, data):
        if not self._chunked:
            complete = True
            self.__on_message_complete = True
            # if we have enough data
            if len(data) < self._clen:
                complete = False
                self._clen = len(data)
                self.errno = INVALID_BODY
                self.errstr = "HTTP body incomplete"

            body_part = data[:self._clen]
            self.nb_parsed += len(body_part)

            # maybe decompress
            if complete and body_part and self.__decompress_obj is not None:
                if not self.__decompress_first_try:
                    body_part = self.__decompress_obj.decompress(body_part)
                else:
                    try:
                        body_part = self.__decompress_obj.decompress(body_part)
                    except zlib.error:
                        self.__decompress_obj.decompressobj = zlib.decompressobj(-zlib.MAX_WBITS)
                        body_part = self.__decompress_obj.decompress(body_part)
                    self.__decompress_first_try = False

            if body_part:
                self._have_body = True
                self._body = body_part

            # we parsed enough content for this response
            # so just ignore the other data
            self._buf = ""

            return
        else:
            try:
                size, rest = self._parse_chunk_size(data)
            except InvalidChunkSize as e:
                self.errno = INVALID_CHUNK
                self.errstr = "invalid chunk size [%s]" % str(e)
                return INVALID_CHUNK

            if size == 0:
                def error_trailer():
                        self.errno = INVALID_TRAILER
                        self.errstr = "Invalid trailer"
                        return INVALID_TRAILER
                # if this response have trailer
                if self._have_trailer and rest[:2] != "\r\n":
                    idx = rest.find("\r\n\r\n")
                    if idx != -1:
                        try:
                            self._parse_headers(rest[:idx + 4])
                            self.nb_parsed += idx + 4
                            return 0
                        except InvalidHeader:
                            return error_trailer()
                    else:
                        return error_trailer()
                if rest[:2] == '\r\n':
                    self.nb_parsed += 2
                    self._buf = ""
                    return 0
                else:
                    return error_trailer()

            if size is None or len(rest) < size:
                self.errno = INVALID_CHUNK
                self.errstr = "Invalid trunk size"
                return INVALID_CHUNK

            body_part, rest = rest[:size], rest[size:]
            if len(rest) < 2:
                self.errno = INVALID_CHUNK
                self.errstr = "chunk missing terminator [%s]" % data
                return INVALID_CHUNK

            self.nb_parsed += len(body_part) + 2

            # maybe decompress
            if self.__decompress_obj is not None:
                body_part = self.__decompress_obj.decompress(body_part)

            self._have_body = True
            self._body += body_part
            self._buf = rest[2:]
            return len(body_part)

    def _parse_chunk_size(self, data):
        idx = data.find("\r\n")
        if idx < 0:
            return None, None
        line, rest_chunk = data[:idx], data[idx + 2:]
        chunk_size = line.split(";", 1)[0].strip()
        try:
            chunk_size = int(chunk_size, 16)
        except ValueError:
            raise InvalidChunkSize(chunk_size)

        self.nb_parsed += idx + 2
        return chunk_size, rest_chunk
Exemple #3
0
class HttpParser(object):

    def __init__(self, kind=2, decompress=False):
        self.kind = kind
        self.decompress = decompress

        # errors vars
        self.errno = None
        self.errstr = ""

        # protected variables
        self._buf = []
        self._version = None
        self._method = None
        self._status_code = None
        self._status = None
        self._reason = None
        self._url = None
        self._path = None
        self._query_string = None
        self._fragment= None
        self._headers = IOrderedDict()
        self._environ = dict()
        self._chunked = False
        self._body = []
        self._trailers = None
        self._partial_body = False
        self._clen = None
        self._clen_rest = None

        # private events
        self.__on_firstline = False
        self.__on_headers_complete = False
        self.__on_message_begin = False
        self.__on_message_complete = False

        self.__decompress_obj = None

    def get_version(self):
        return self._version

    def get_method(self):
        return self._method

    def get_status_code(self):
        return self._status_code

    def get_url(self):
        return self._url

    def get_path(self):
        return self._path

    def get_query_string(self):
        return self._query_string

    def get_fragment(self):
        return self._fragment

    def get_headers(self):
        return self._headers

    def get_wsgi_environ(self):
        if not self.__on_headers_complete:
            return None

        environ = self._environ.copy()
        # clean special keys
        for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"):
            hkey = "HTTP_%s" % key
            if hkey in environ:
                environ[key] = environ.pop(hkey)

        script_name = environ.get('SCRIPT_NAME',
                os.environ.get("SCRIPT_NAME", ""))
        if script_name:
            path_info = self._path.split(script_name, 1)[1]
            environ.update({
                "PATH_INFO": unquote(path_info),
                "SCRIPT_NAME": script_name})
        else:
            environ['SCRIPT_NAME'] = ""

        if environ.get('HTTP_X_FORWARDED_PROTOCOL', '').lower() == "ssl":
            environ['wsgi.url_scheme'] = "https"
        elif environ.get('HTTP_X_FORWARDED_SSL', '').lower() == "on":
            environ['wsgi.url_scheme'] = "https"
        else:
            environ['wsgi.url_scheme'] = "http"

        return environ

    def recv_body(self):
        """ return last chunk of the parsed body"""
        body = b("").join(self._body)
        self._body = []
        self._partial_body = False
        return body

    def recv_body_into(self, barray):
        """ Receive the last chunk of the parsed bodyand store the data
        in a buffer rather than creating a new string. """
        l = len(barray)
        body = b("").join(self._body)
        m = min(len(body), l)
        data, rest = body[:m], body[m:]
        barray[0:m] = data
        if not rest:
            self._body = []
            self._partial_body = False
        else:
            self._body = [rest]
        return m

    def is_upgrade(self):
        """ Do we get upgrade header in the request. Useful for
        websockets """
        return self._headers.get('connection', "") == "upgrade"

    def is_headers_complete(self):
        """ return True if all headers have been parsed. """
        return self.__on_headers_complete

    def is_partial_body(self):
        """ return True if a chunk of body have been parsed """
        return self._partial_body

    def is_message_begin(self):
        """ return True if the parsing start """
        return self.__on_message_begin

    def is_message_complete(self):
        """ return True if the parsing is done (we get EOF) """
        return self.__on_message_complete

    def is_chunked(self):
        """ return True if Transfer-Encoding header value is chunked"""
        return self._chunked

    def should_keep_alive(self):
        """ return True if the connection should be kept alive
        """
        hconn = self._headers.get('connection', "").lower()
        if hconn == "close":
            return False
        elif hconn == "keep-alive":
            return True
        return self._version == (1, 1)

    def execute(self, data, length):
        # end of body can be passed manually by putting a length of 0

        if length == 0:
            self.__on_message_complete = True
            return length

        # start to parse
        nb_parsed = 0
        while True:
            if not self.__on_firstline:
                idx = data.find(b("\r\n"))
                if idx < 0:
                    self._buf.append(data)
                    return len(data)
                else:
                    self.__on_firstline = True
                    self._buf.append(data[:idx])
                    first_line = bytes_to_str(b("").join(self._buf))
                    nb_parsed = nb_parsed + idx + 2

                    rest = data[idx+2:]
                    data = b("")
                    if self._parse_firstline(first_line):
                        self._buf = [rest]
                    else:
                        return nb_parsed
            elif not self.__on_headers_complete:
                if data:
                    self._buf.append(data)
                    data = b("")

                try:
                    to_parse = b("").join(self._buf)
                    ret = self._parse_headers(to_parse)
                    if not ret:
                        return length
                    nb_parsed = nb_parsed + (len(to_parse) - ret)
                except InvalidHeader as e:
                    self.errno = INVALID_HEADER
                    self.errstr = str(e)
                    return nb_parsed
            elif not self.__on_message_complete:
                if not self.__on_message_begin:
                    self.__on_message_begin = True

                if data:
                    self._buf.append(data)
                    data = b("")

                ret = self._parse_body()
                if ret is None:
                    return length

                elif ret < 0:
                    return ret
                elif ret == 0:
                    self.__on_message_complete = True
                    return length
                else:
                    nb_parsed = max(length, ret)

            else:
                return 0

    def _parse_firstline(self, line):
        try:
            if self.kind == 2: # auto detect
                try:
                    self._parse_request_line(line)
                except InvalidRequestLine:
                    self._parse_response_line(line)
            elif self.kind == 1:
                self._parse_response_line(line)
            elif self.kind == 0:
                self._parse_request_line(line)
        except InvalidRequestLine as e:
            self.errno = BAD_FIRST_LINE
            self.errstr = str(e)
            return False
        return True

    def _parse_response_line(self, line):
        bits = line.split(None, 1)
        if len(bits) != 2:
            raise InvalidRequestLine(line)

        # version
        matchv = VERSION_RE.match(bits[0])
        if matchv is None:
            raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0])
        self._version = (int(matchv.group(1)), int(matchv.group(2)))

        # status
        matchs = STATUS_RE.match(bits[1])
        if matchs is None:
            raise InvalidRequestLine("Invalid status %" % bits[1])

        self._status = bits[1]
        self._status_code = int(matchs.group(1))
        self._reason = matchs.group(2)

    def _parse_request_line(self, line):
        bits = line.split(None, 2)
        if len(bits) != 3:
            raise InvalidRequestLine(line)

        # Method
        if not METHOD_RE.match(bits[0]):
            raise InvalidRequestLine("invalid Method: %s" % bits[0])
        self._method = bits[0].upper()

        # URI
        self._url = bits[1]
        parts = urlparse.urlsplit(bits[1])
        self._path = parts.path or ""
        self._query_string = parts.query or ""
        self._fragment = parts.fragment or ""

        # Version
        match = VERSION_RE.match(bits[2])
        if match is None:
            raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2])
        self._version = (int(match.group(1)), int(match.group(2)))

        # update environ
        if hasattr(self,'environ'):
            self._environ.update({
                "PATH_INFO": self._path,
                "QUERY_STRING": self._query_string,
                "RAW_URI": self._url,
                "REQUEST_METHOD": self._method,
                "SERVER_PROTOCOL": bits[2]})

    def _parse_headers(self, data):
        idx = data.find(b("\r\n\r\n"))
        if idx < 0: # we don't have all headers
            return False

        # Split lines on \r\n keeping the \r\n on each line
        lines = [bytes_to_str(line) + "\r\n" for line in
                data[:idx].split(b("\r\n"))]

        # Parse headers into key/value pairs paying attention
        # to continuation lines.
        while len(lines):
            # Parse initial header name : value pair.
            curr = lines.pop(0)
            if curr.find(":") < 0:
                raise InvalidHeader("invalid line %s" % curr.strip())
            name, value = curr.split(":", 1)
            name = name.rstrip(" \t").upper()
            if HEADER_RE.search(name):
                raise InvalidHeader("invalid header name %s" % name)
            name, value = name.strip(), [value.lstrip()]

            # Consume value continuation lines
            while len(lines) and lines[0].startswith((" ", "\t")):
                value.append(lines.pop(0))
            value = ''.join(value).rstrip()

            # multiple headers
            if name in self._headers:
                value = "%s, %s" % (self._headers[name], value)

            # store new header value
            self._headers[name] = value

            # update WSGI environ
            key =  'HTTP_%s' % name.upper().replace('-','_')
            self._environ[key] = value

        # detect now if body is sent by chunks.
        clen = self._headers.get('content-length')
        te = self._headers.get('transfer-encoding', '').lower()

        if clen is not None:
            try:
                self._clen_rest = self._clen = int(clen)
            except ValueError:
                pass
        else:
            self._chunked = (te == 'chunked')
            if not self._chunked:
                self._clen_rest = MAXSIZE

        # detect encoding and set decompress object
        encoding = self._headers.get('content-encoding')
        if self.decompress:
            if encoding == "gzip":
                self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS)
            elif encoding == "deflate":
                self.__decompress_obj = zlib.decompressobj()

        rest = data[idx+4:]
        self._buf = [rest]
        self.__on_headers_complete = True
        return len(rest)

    def _parse_body(self):
        if not self._chunked:
            body_part = b("").join(self._buf)
            self._clen_rest -= len(body_part)

            # maybe decompress
            if self.__decompress_obj is not None:
                body_part = self.__decompress_obj.decompress(body_part)

            self._partial_body = True
            self._body.append(body_part)
            self._buf = []

            if self._clen_rest <= 0:
                self.__on_message_complete = True
            return
        else:
            data = b("").join(self._buf)
            try:

                size, rest = self._parse_chunk_size(data)
            except InvalidChunkSize as e:
                self.errno = INVALID_CHUNK
                self.errstr = "invalid chunk size [%s]" % str(e)
                return -1

            if size == 0:
                return size

            if size is None or len(rest) < size:
                return None


            body_part, rest = rest[:size], rest[size:]
            if len(rest) < 2:
                self.errno = INVALID_CHUNK
                self.errstr = "chunk missing terminator [%s]" % data
                return -1

            # maybe decompress
            if self.__decompress_obj is not None:
                body_part = self.__decompress_obj.decompress(body_part)

            self._partial_body = True
            self._body.append(body_part)

            self._buf = [rest[2:]]
            return len(rest)

    def _parse_chunk_size(self, data):
        idx = data.find(b("\r\n"))
        if idx < 0:
            return None, None
        line, rest_chunk = data[:idx], data[idx+2:]
        chunk_size = line.split(b(";"), 1)[0].strip()
        try:
            chunk_size = int(chunk_size, 16)
        except ValueError:
            raise InvalidChunkSize(chunk_size)

        if chunk_size == 0:
            self._parse_trailers(rest_chunk)
            return 0, None
        return chunk_size, rest_chunk

    def _parse_trailers(self, data):
        idx = data.find(b("\r\n\r\n"))

        if data[:2] == b("\r\n"):
            self._trailers = self._parse_headers(data[:idx])