def __init__(self, kind=2, decompress=False): self.kind = kind self.decompress = decompress # errors vars self.errno = None self.errstr = "" # protected variables self._buf = [] self._version = None self._method = None self._status_code = None self._status = None self._reason = None self._url = None self._path = None self._query_string = None self._fragment= None self._headers = IOrderedDict() self._environ = dict() self._chunked = False self._body = [] self._trailers = None self._partial_body = False self._clen = None self._clen_rest = None # private events self.__on_firstline = False self.__on_headers_complete = False self.__on_message_begin = False self.__on_message_complete = False self.__decompress_obj = None
class HttpParser(object): def __init__(self, kind=2, decompress=False): self.kind = kind self.decompress = decompress # errors vars self.errno = None self.errstr = "" # protected variables # containers self._buf = "" self._body = "" self._environ = dict() self._headers = IOrderedDict() # variables self._version = None self._method = None self._status_code = None self._status = None self._reason = None self._url = None self._path = None self._query_string = None self._fragment = None # flags self._chunked = False self._have_trailer = False self._have_body = False self.nb_parsed = 0 self._clen = 0 # private events self.__on_firstline = False self.__on_headers_complete = False self.__on_message_begin = False self.__on_message_complete = False self.__decompress_obj = None self.__decompress_first_try = True def get_version(self): return self._version def get_method(self): return self._method def get_status_code(self): return self._status_code def get_reason(self): return self._reason def get_url(self): return self._url def get_path(self): return self._path def get_query_string(self): return self._query_string def get_fragment(self): return self._fragment def get_headers(self): return self._headers def get_wsgi_environ(self): if not self.__on_headers_complete: return None environ = self._environ.copy() # clean special keys for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"): hkey = "HTTP_%s" % key if hkey in environ: environ[key] = environ.pop(hkey) script_name = environ.get('SCRIPT_NAME', os.environ.get("SCRIPT_NAME", "")) if script_name: path_info = self._path.split(script_name, 1)[1] environ.update({ "PATH_INFO": unquote(path_info), "SCRIPT_NAME": script_name}) else: environ['SCRIPT_NAME'] = "" if environ.get('HTTP_X_FORWARDED_PROTOCOL', '').lower() == "ssl": environ['wsgi.url_scheme'] = "https" elif environ.get('HTTP_X_FORWARDED_SSL', '').lower() == "on": environ['wsgi.url_scheme'] = "https" else: environ['wsgi.url_scheme'] = "http" return environ def get_body(self): """ return last chunk of the parsed body""" return self._body def is_upgrade(self): """ Do we get upgrade header in the request. Useful for websockets """ return self._headers.get('connection', "").lower() == "upgrade" def is_headers_complete(self): """ return True if all headers have been parsed. """ return self.__on_headers_complete def is_partial_body(self): """ return True if a chunk of body have been parsed """ return self._have_body def is_message_begin(self): """ return True if the parsing start """ return self.__on_message_begin def is_message_complete(self): """ return True if the parsing is done (we get EOF) """ return self.__on_message_complete def is_chunked(self): """ return True if Transfer-Encoding header value is chunked""" return self._chunked def should_keep_alive(self): """ return True if the connection should be kept alive """ hconn = self._headers.get('connection', "").lower() if hconn == "close": return False elif hconn == "keep-alive": return True return self._version == (1, 1) def execute(self, data): # end of body can be passed manually by putting a length of 0 if len(data) == 0: self.__on_message_complete = True return 0 # start to parse while True: if not self.__on_firstline: # try to extract first line idx = data.find("\r\n") if idx < 0: self.errno = BAD_FIRST_LINE self.errstr = "Invalid HTTP request/status line" return BAD_FIRST_LINE else: self.__on_firstline = True first_line, self._buf = data[:idx], data[idx + 2:] self.nb_parsed += idx + 2 data = "" if not self._parse_firstline(first_line): return BAD_FIRST_LINE elif not self.__on_headers_complete: try: to_parse = self._buf ret = self._parse_headers(to_parse) self.__on_headers_complete = True self.nb_parsed += len(to_parse) - ret except InvalidHeader as e: self.errno = INVALID_HEADER self.errstr = str(e) return INVALID_HEADER elif not self.__on_message_complete: if not self.__on_message_begin: self.__on_message_begin = True to_parse = self._buf ret = self._parse_body(to_parse) # this is a normal request if ret is None: self.__on_message_complete = True elif ret == 0: # finished parsing self.__on_message_complete = True elif ret < 0: # on error return ret elif ret > 0: # parsed a trunk, do nothing pass else: return self.nb_parsed def _parse_firstline(self, line): try: if self.kind == 2: # auto detect try: self._parse_request_line(line) except InvalidRequestLine: self._parse_response_line(line) elif self.kind == 1: self._parse_response_line(line) elif self.kind == 0: self._parse_request_line(line) except InvalidRequestLine as e: self.errno = BAD_FIRST_LINE self.errstr = str(e) return False return True def _parse_response_line(self, line): bits = line.split(None, 1) if len(bits) != 2: raise InvalidRequestLine(line) # version matchv = VERSION_RE.match(bits[0]) if matchv is None: raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0]) self._version = (int(matchv.group(1)), int(matchv.group(2))) # status matchs = STATUS_RE.match(bits[1]) if matchs is None: raise InvalidRequestLine("Invalid status %" % bits[1]) self._status = bits[1] self._status_code = int(matchs.group(1)) self._reason = matchs.group(2) def _parse_request_line(self, line): bits = line.split(None, 2) if len(bits) != 3: raise InvalidRequestLine(line) # Method if not METHOD_RE.match(bits[0]): raise InvalidRequestLine("invalid Method: %s" % bits[0]) self._method = bits[0].upper() # URI self._url = bits[1] parts = urlparse.urlsplit(bits[1]) self._path = parts.path or "" self._query_string = parts.query or "" self._fragment = parts.fragment or "" # Version match = VERSION_RE.match(bits[2]) if match is None: raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2]) self._version = (int(match.group(1)), int(match.group(2))) # update environ if hasattr(self, 'environ'): self._environ.update({ "PATH_INFO": self._path, "QUERY_STRING": self._query_string, "RAW_URI": self._url, "REQUEST_METHOD": self._method, "SERVER_PROTOCOL": bits[2]}) def _parse_headers(self, data): idx = data.find("\r\n\r\n") if idx < 0: # we don't have all headers raise InvalidHeader('Headers not complete') # Split lines on \r\n keeping the \r\n on each line lines = [line + "\r\n" for line in data[:idx].split("\r\n")] # Parse headers into key/value pairs paying attention # to continuation lines. while len(lines): # Parse initial header name : value pair. curr = lines.pop(0) if curr.find(":") < 0: raise InvalidHeader("invalid line %s" % curr.strip()) name, value = curr.split(":", 1) name = name.rstrip(" \t").upper() if HEADER_RE.search(name): raise InvalidHeader("invalid header name %s" % name) if value.endswith("\r\n"): value = value[:-2] name, value = name.strip(), [value.lstrip()] # Consume value continuation lines while len(lines) and lines[0].startswith((" ", "\t")): curr = lines.pop(0) if curr.endswith("\r\n"): curr = curr[:-2] value.append(curr) value = ''.join(value).rstrip() # multiple headers if name in self._headers: value = "%s, %s" % (self._headers[name], value) # store new header value self._headers[name] = value # update WSGI environ key = 'HTTP_%s' % name.upper().replace('-', '_') self._environ[key] = value # detect now if body is sent by chunks. clen = self._headers.get('content-length') te = self._headers.get('transfer-encoding', '').lower() if self._headers.get('Trailer'): self._have_trailer = True if clen is not None: try: self._clen = int(clen) except ValueError: pass else: self._chunked = (te == 'chunked') if not self._chunked: self._clen_rest = MAXSIZE # detect encoding and set decompress object encoding = self._headers.get('content-encoding') if self.decompress: if encoding == "gzip": self.__decompress_obj = zlib.decompressobj(16 + zlib.MAX_WBITS) self.__decompress_first_try = False elif encoding == "deflate": self.__decompress_obj = zlib.decompressobj() rest = data[idx + 4:] self._buf = rest return len(rest) def _parse_body(self, data): if not self._chunked: complete = True self.__on_message_complete = True # if we have enough data if len(data) < self._clen: complete = False self._clen = len(data) self.errno = INVALID_BODY self.errstr = "HTTP body incomplete" body_part = data[:self._clen] self.nb_parsed += len(body_part) # maybe decompress if complete and body_part and self.__decompress_obj is not None: if not self.__decompress_first_try: body_part = self.__decompress_obj.decompress(body_part) else: try: body_part = self.__decompress_obj.decompress(body_part) except zlib.error: self.__decompress_obj.decompressobj = zlib.decompressobj(-zlib.MAX_WBITS) body_part = self.__decompress_obj.decompress(body_part) self.__decompress_first_try = False if body_part: self._have_body = True self._body = body_part # we parsed enough content for this response # so just ignore the other data self._buf = "" return else: try: size, rest = self._parse_chunk_size(data) except InvalidChunkSize as e: self.errno = INVALID_CHUNK self.errstr = "invalid chunk size [%s]" % str(e) return INVALID_CHUNK if size == 0: def error_trailer(): self.errno = INVALID_TRAILER self.errstr = "Invalid trailer" return INVALID_TRAILER # if this response have trailer if self._have_trailer and rest[:2] != "\r\n": idx = rest.find("\r\n\r\n") if idx != -1: try: self._parse_headers(rest[:idx + 4]) self.nb_parsed += idx + 4 return 0 except InvalidHeader: return error_trailer() else: return error_trailer() if rest[:2] == '\r\n': self.nb_parsed += 2 self._buf = "" return 0 else: return error_trailer() if size is None or len(rest) < size: self.errno = INVALID_CHUNK self.errstr = "Invalid trunk size" return INVALID_CHUNK body_part, rest = rest[:size], rest[size:] if len(rest) < 2: self.errno = INVALID_CHUNK self.errstr = "chunk missing terminator [%s]" % data return INVALID_CHUNK self.nb_parsed += len(body_part) + 2 # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._have_body = True self._body += body_part self._buf = rest[2:] return len(body_part) def _parse_chunk_size(self, data): idx = data.find("\r\n") if idx < 0: return None, None line, rest_chunk = data[:idx], data[idx + 2:] chunk_size = line.split(";", 1)[0].strip() try: chunk_size = int(chunk_size, 16) except ValueError: raise InvalidChunkSize(chunk_size) self.nb_parsed += idx + 2 return chunk_size, rest_chunk
class HttpParser(object): def __init__(self, kind=2, decompress=False): self.kind = kind self.decompress = decompress # errors vars self.errno = None self.errstr = "" # protected variables self._buf = [] self._version = None self._method = None self._status_code = None self._status = None self._reason = None self._url = None self._path = None self._query_string = None self._fragment= None self._headers = IOrderedDict() self._environ = dict() self._chunked = False self._body = [] self._trailers = None self._partial_body = False self._clen = None self._clen_rest = None # private events self.__on_firstline = False self.__on_headers_complete = False self.__on_message_begin = False self.__on_message_complete = False self.__decompress_obj = None def get_version(self): return self._version def get_method(self): return self._method def get_status_code(self): return self._status_code def get_url(self): return self._url def get_path(self): return self._path def get_query_string(self): return self._query_string def get_fragment(self): return self._fragment def get_headers(self): return self._headers def get_wsgi_environ(self): if not self.__on_headers_complete: return None environ = self._environ.copy() # clean special keys for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"): hkey = "HTTP_%s" % key if hkey in environ: environ[key] = environ.pop(hkey) script_name = environ.get('SCRIPT_NAME', os.environ.get("SCRIPT_NAME", "")) if script_name: path_info = self._path.split(script_name, 1)[1] environ.update({ "PATH_INFO": unquote(path_info), "SCRIPT_NAME": script_name}) else: environ['SCRIPT_NAME'] = "" if environ.get('HTTP_X_FORWARDED_PROTOCOL', '').lower() == "ssl": environ['wsgi.url_scheme'] = "https" elif environ.get('HTTP_X_FORWARDED_SSL', '').lower() == "on": environ['wsgi.url_scheme'] = "https" else: environ['wsgi.url_scheme'] = "http" return environ def recv_body(self): """ return last chunk of the parsed body""" body = b("").join(self._body) self._body = [] self._partial_body = False return body def recv_body_into(self, barray): """ Receive the last chunk of the parsed bodyand store the data in a buffer rather than creating a new string. """ l = len(barray) body = b("").join(self._body) m = min(len(body), l) data, rest = body[:m], body[m:] barray[0:m] = data if not rest: self._body = [] self._partial_body = False else: self._body = [rest] return m def is_upgrade(self): """ Do we get upgrade header in the request. Useful for websockets """ return self._headers.get('connection', "") == "upgrade" def is_headers_complete(self): """ return True if all headers have been parsed. """ return self.__on_headers_complete def is_partial_body(self): """ return True if a chunk of body have been parsed """ return self._partial_body def is_message_begin(self): """ return True if the parsing start """ return self.__on_message_begin def is_message_complete(self): """ return True if the parsing is done (we get EOF) """ return self.__on_message_complete def is_chunked(self): """ return True if Transfer-Encoding header value is chunked""" return self._chunked def should_keep_alive(self): """ return True if the connection should be kept alive """ hconn = self._headers.get('connection', "").lower() if hconn == "close": return False elif hconn == "keep-alive": return True return self._version == (1, 1) def execute(self, data, length): # end of body can be passed manually by putting a length of 0 if length == 0: self.__on_message_complete = True return length # start to parse nb_parsed = 0 while True: if not self.__on_firstline: idx = data.find(b("\r\n")) if idx < 0: self._buf.append(data) return len(data) else: self.__on_firstline = True self._buf.append(data[:idx]) first_line = bytes_to_str(b("").join(self._buf)) nb_parsed = nb_parsed + idx + 2 rest = data[idx+2:] data = b("") if self._parse_firstline(first_line): self._buf = [rest] else: return nb_parsed elif not self.__on_headers_complete: if data: self._buf.append(data) data = b("") try: to_parse = b("").join(self._buf) ret = self._parse_headers(to_parse) if not ret: return length nb_parsed = nb_parsed + (len(to_parse) - ret) except InvalidHeader as e: self.errno = INVALID_HEADER self.errstr = str(e) return nb_parsed elif not self.__on_message_complete: if not self.__on_message_begin: self.__on_message_begin = True if data: self._buf.append(data) data = b("") ret = self._parse_body() if ret is None: return length elif ret < 0: return ret elif ret == 0: self.__on_message_complete = True return length else: nb_parsed = max(length, ret) else: return 0 def _parse_firstline(self, line): try: if self.kind == 2: # auto detect try: self._parse_request_line(line) except InvalidRequestLine: self._parse_response_line(line) elif self.kind == 1: self._parse_response_line(line) elif self.kind == 0: self._parse_request_line(line) except InvalidRequestLine as e: self.errno = BAD_FIRST_LINE self.errstr = str(e) return False return True def _parse_response_line(self, line): bits = line.split(None, 1) if len(bits) != 2: raise InvalidRequestLine(line) # version matchv = VERSION_RE.match(bits[0]) if matchv is None: raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0]) self._version = (int(matchv.group(1)), int(matchv.group(2))) # status matchs = STATUS_RE.match(bits[1]) if matchs is None: raise InvalidRequestLine("Invalid status %" % bits[1]) self._status = bits[1] self._status_code = int(matchs.group(1)) self._reason = matchs.group(2) def _parse_request_line(self, line): bits = line.split(None, 2) if len(bits) != 3: raise InvalidRequestLine(line) # Method if not METHOD_RE.match(bits[0]): raise InvalidRequestLine("invalid Method: %s" % bits[0]) self._method = bits[0].upper() # URI self._url = bits[1] parts = urlparse.urlsplit(bits[1]) self._path = parts.path or "" self._query_string = parts.query or "" self._fragment = parts.fragment or "" # Version match = VERSION_RE.match(bits[2]) if match is None: raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2]) self._version = (int(match.group(1)), int(match.group(2))) # update environ if hasattr(self,'environ'): self._environ.update({ "PATH_INFO": self._path, "QUERY_STRING": self._query_string, "RAW_URI": self._url, "REQUEST_METHOD": self._method, "SERVER_PROTOCOL": bits[2]}) def _parse_headers(self, data): idx = data.find(b("\r\n\r\n")) if idx < 0: # we don't have all headers return False # Split lines on \r\n keeping the \r\n on each line lines = [bytes_to_str(line) + "\r\n" for line in data[:idx].split(b("\r\n"))] # Parse headers into key/value pairs paying attention # to continuation lines. while len(lines): # Parse initial header name : value pair. curr = lines.pop(0) if curr.find(":") < 0: raise InvalidHeader("invalid line %s" % curr.strip()) name, value = curr.split(":", 1) name = name.rstrip(" \t").upper() if HEADER_RE.search(name): raise InvalidHeader("invalid header name %s" % name) name, value = name.strip(), [value.lstrip()] # Consume value continuation lines while len(lines) and lines[0].startswith((" ", "\t")): value.append(lines.pop(0)) value = ''.join(value).rstrip() # multiple headers if name in self._headers: value = "%s, %s" % (self._headers[name], value) # store new header value self._headers[name] = value # update WSGI environ key = 'HTTP_%s' % name.upper().replace('-','_') self._environ[key] = value # detect now if body is sent by chunks. clen = self._headers.get('content-length') te = self._headers.get('transfer-encoding', '').lower() if clen is not None: try: self._clen_rest = self._clen = int(clen) except ValueError: pass else: self._chunked = (te == 'chunked') if not self._chunked: self._clen_rest = MAXSIZE # detect encoding and set decompress object encoding = self._headers.get('content-encoding') if self.decompress: if encoding == "gzip": self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS) elif encoding == "deflate": self.__decompress_obj = zlib.decompressobj() rest = data[idx+4:] self._buf = [rest] self.__on_headers_complete = True return len(rest) def _parse_body(self): if not self._chunked: body_part = b("").join(self._buf) self._clen_rest -= len(body_part) # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._partial_body = True self._body.append(body_part) self._buf = [] if self._clen_rest <= 0: self.__on_message_complete = True return else: data = b("").join(self._buf) try: size, rest = self._parse_chunk_size(data) except InvalidChunkSize as e: self.errno = INVALID_CHUNK self.errstr = "invalid chunk size [%s]" % str(e) return -1 if size == 0: return size if size is None or len(rest) < size: return None body_part, rest = rest[:size], rest[size:] if len(rest) < 2: self.errno = INVALID_CHUNK self.errstr = "chunk missing terminator [%s]" % data return -1 # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._partial_body = True self._body.append(body_part) self._buf = [rest[2:]] return len(rest) def _parse_chunk_size(self, data): idx = data.find(b("\r\n")) if idx < 0: return None, None line, rest_chunk = data[:idx], data[idx+2:] chunk_size = line.split(b(";"), 1)[0].strip() try: chunk_size = int(chunk_size, 16) except ValueError: raise InvalidChunkSize(chunk_size) if chunk_size == 0: self._parse_trailers(rest_chunk) return 0, None return chunk_size, rest_chunk def _parse_trailers(self, data): idx = data.find(b("\r\n\r\n")) if data[:2] == b("\r\n"): self._trailers = self._parse_headers(data[:idx])