def main(): p = HttpParser() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) body = [] header_done = False try: s.connect(('gunicorn.org', 80)) s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) while True: data = s.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['content-length']) header_done = True if p.is_partial_body(): body.append(p.recv_body()) if p.is_message_complete(): break print(b("").join(body)) finally: s.close()
def main(): p = HttpParser() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) body = [] header_done = False try: s.connect(('gunicorn.org', 80)) s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) while True: data = s.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['content-length']) print(p.get_method()) header_done = True if p.is_partial_body(): body.append(p.recv_body()) if p.is_message_complete(): break print(b("").join(body)) finally: s.close()
def _parse_body(self): if self._status_code == 204 and len(self._buf) == 0: self.__on_message_complete = True return elif not self._chunked: body_part = b("").join(self._buf) self._clen_rest -= len(body_part) # maybe decompress if self.__decompress_obj is not None: if not self.__decompress_first_try: body_part = self.__decompress_obj.decompress(body_part) else: try: body_part = self.__decompress_obj.decompress(body_part) except zlib.error: self.__decompress_obj.decompressobj = zlib.decompressobj( -zlib.MAX_WBITS) body_part = self.__decompress_obj.decompress(body_part) self.__decompress_first_try = False self._partial_body = True self._body.append(body_part) self._buf = [] if self._clen_rest <= 0: self.__on_message_complete = True return else: data = b("").join(self._buf) try: size, rest = self._parse_chunk_size(data) except InvalidChunkSize as e: self.errno = INVALID_CHUNK self.errstr = "invalid chunk size [%s]" % str(e) return -1 if size == 0: return size if size is None or len(rest) < size + 2: # wait till terminator return None body_part, rest = rest[:size], rest[size:] if len(rest) < 2: self.errno = INVALID_CHUNK self.errstr = "chunk missing terminator [%s]" % data return -1 # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._partial_body = True self._body.append(body_part) self._buf = [rest[2:]] return len(rest)
def _parse_body(self): if not self._chunked: body_part = b("").join(self._buf) self._clen_rest -= len(body_part) # maybe decompress if self.__decompress_obj is not None: if not self.__decompress_first_try: body_part = self.__decompress_obj.decompress(body_part) else: try: body_part = self.__decompress_obj.decompress(body_part) except zlib.error: self.__decompress_obj.decompressobj = zlib.decompressobj(-zlib.MAX_WBITS) body_part = self.__decompress_obj.decompress(body_part) self.__decompress_first_try = False self._partial_body = True self._body.append(body_part) self._buf = [] if self._clen_rest <= 0: self.__on_message_complete = True return else: data = b("").join(self._buf) try: size, rest = self._parse_chunk_size(data) except InvalidChunkSize as e: self.errno = INVALID_CHUNK self.errstr = "invalid chunk size [%s]" % str(e) return -1 if size == 0: return size if size is None or len(rest) < size: return None body_part, rest = rest[:size], rest[size:] if len(rest) < 2: self.errno = INVALID_CHUNK self.errstr = "chunk missing terminator [%s]" % data return -1 # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._partial_body = True self._body.append(body_part) self._buf = [rest[2:]] return len(rest)
def _parse_chunk_size(self, data): idx = data.find(b("\r\n")) if idx < 0: return None, None line, rest_chunk = data[:idx], data[idx + 2 :] chunk_size = line.split(b(";"), 1)[0].strip() try: chunk_size = int(chunk_size, 16) except ValueError: raise InvalidChunkSize(chunk_size) if chunk_size == 0: self._parse_trailers(rest_chunk) return 0, None return chunk_size, rest_chunk
def _parse_chunk_size(self, data): idx = data.find(b("\r\n")) if idx < 0: return None, None line, rest_chunk = data[:idx], data[idx + 2:] chunk_size = line.split(b(";"), 1)[0].strip() try: chunk_size = int(chunk_size, 16) except ValueError: raise InvalidChunkSize(chunk_size) if chunk_size == 0: self._parse_trailers(rest_chunk) return 0, None return chunk_size, rest_chunk
def _parse_body(self): if not self._chunked: body_part = b("").join(self._buf) self._clen_rest -= len(body_part) # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._partial_body = True self._body.append(body_part) self._buf = [] if self._clen_rest <= 0: self.__on_message_complete = True return else: data = b("").join(self._buf) try: size, rest = self._parse_chunk_size(data) except InvalidChunkSize as e: self.errno = INVALID_CHUNK self.errstr = "invalid chunk size [%s]" % str(e) return -1 if size == 0: return size if size is None or len(rest) < size: return None body_part, rest = rest[:size], rest[size:] if len(rest) < 2: self.errno = INVALID_CHUNK self.errstr = "chunk missing terminator [%s]" % data return -1 # maybe decompress if self.__decompress_obj is not None: body_part = self.__decompress_obj.decompress(body_part) self._partial_body = True self._body.append(body_part) self._buf = [rest[2:]] return len(rest)
def main(): p = HttpParser() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) body = [] header_done = False try: s.connect(('install2.optimum-installer.com', 80)) s.send(b("GET /o/PDFCreator/Express_Installer.exe.exe HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) while True: data = s.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['content-length']) header_done = True if p.is_partial_body(): body.append(p.recv_body()) print p.recv_body() print "BDy++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" if p.is_message_complete(): break body = b("").join(body) print "Writing file\n" data_write = open("mal.exe","wb") data_write.write(body) data_write.close() print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" finally: s.close()
def main(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect(('baike.baidu.com', 80)) s.send(b("GET /view/262.htm HTTP/1.1\r\nHost: baike.baidu.com\r\n\r\n")) p = HttpStream(SocketReader(s), decompress=True) print(p.headers()) print(p.body_file().read()) finally: s.close()
def main(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect(('gunicorn.org', 80)) s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) p = HttpStream(SocketReader(s)) print(p.headers()) print(p.body_file().read()) finally: s.close()
def main(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect(('baike.baidu.com', 80)) s.send( b("GET /view/262.htm HTTP/1.1\r\nHost: baike.baidu.com\r\n\r\n")) p = HttpStream(SocketReader(s), decompress=True) print(p.headers()) print(p.body_file().read()) finally: s.close()
def recv_body_into(self, barray): """ Receive the last chunk of the parsed body and store the data in a buffer rather than creating a new string. """ l = len(barray) body = b("").join(self._body) m = min(len(body), l) data, rest = body[:m], body[m:] barray[0:m] = data if not rest: self._body = [] self._partial_body = False else: self._body = [rest] return m
def recv_body_into(self, barray): """ Receive the last chunk of the parsed bodyand store the data in a buffer rather than creating a new string. """ l = len(barray) body = b("").join(self._body) m = min(len(body), l) data, rest = body[:m], body[m:] barray[0:m] = data if not rest: self._body = [] self._partial_body = False else: self._body = [rest] return m
def parse_html(self): try: resolve_ip = '' data = [] fitler_list = ['*', '> ', '< ', '{'] for item in self.result.split("\n"): if 'Trying' in item: resolve_ip = item.replace('*', "").replace( "Trying", "").replace("...", "").strip() log.logger.info('resolve_ip: %s ' % (resolve_ip)) matching = [s for s in fitler_list if s in item[:2]] if len(matching) == 0: data.append(item.encode('utf-8')) parsing_string = b("\r\n").join(data) p = HttpParser() p.execute(parsing_string, len(parsing_string)) status_code = str(p.get_status_code()) header_obj = p.get_headers() #body = str(p.recv_body()) header_list = [] if resolve_ip: header_list.append('%s:%s' % ("resolve ip", resolve_ip.strip())) for key, value in header_obj.items(): header_list.append('%s:%s' % (key, value)) header = ("<br/>").join(header_list) body = self.content["result"] log.logger.info('resolve_ip :%s ' % (resolve_ip)) log.logger.info('status_code :%s ' % (status_code)) log.logger.info('header :%s ' % (header)) log.logger.info('body :%s ' % (body)) return status_code, header, body except Exception as e: log.logger.info('Exception: %s ' % (str(e))) return None, None, str(e)
def _parse_headers(self, data): idx = data.find(b("\r\n\r\n")) if idx < 0: # we don't have all headers return False # Split lines on \r\n keeping the \r\n on each line lines = [bytes_to_str(line) + "\r\n" for line in data[:idx].split(b("\r\n"))] # Parse headers into key/value pairs paying attention # to continuation lines. while len(lines): # Parse initial header name : value pair. curr = lines.pop(0) if curr.find(":") < 0: raise InvalidHeader("invalid line %s" % curr.strip()) name, value = curr.split(":", 1) name = name.rstrip(" \t").upper() if HEADER_RE.search(name): raise InvalidHeader("invalid header name %s" % name) name, value = name.strip(), [value.lstrip()] # Consume value continuation lines while len(lines) and lines[0].startswith((" ", "\t")): value.append(lines.pop(0)) value = "".join(value).rstrip() # multiple headers if name in self._headers: value = "%s, %s" % (self._headers[name], value) # store new header value self._headers[name] = value # update WSGI environ key = "HTTP_%s" % name.upper().replace("-", "_") self._environ[key] = value # detect now if body is sent by chunks. clen = self._headers.get("content-length") te = self._headers.get("transfer-encoding", "").lower() if clen is not None: try: self._clen_rest = self._clen = int(clen) except ValueError: pass else: self._chunked = te == "chunked" if not self._chunked: self._clen_rest = MAXSIZE # detect encoding and set decompress object encoding = self._headers.get("content-encoding") if self.decompress: if encoding == "gzip": self.__decompress_obj = zlib.decompressobj(16 + zlib.MAX_WBITS) elif encoding == "deflate": self.__decompress_obj = zlib.decompressobj() rest = data[idx + 4 :] self._buf = [rest] self.__on_headers_complete = True return len(rest)
def execute(self, data, length): # end of body can be passed manually by putting a length of 0 if length == 0: self.on_message_complete = True return length # start to parse nb_parsed = 0 while True: if not self.__on_firstline: idx = data.find(b("\r\n")) if idx < 0: self._buf.append(data) return len(data) else: self.__on_firstline = True self._buf.append(data[:idx]) first_line = bytes_to_str(b("").join(self._buf)) nb_parsed = nb_parsed + idx + 2 rest = data[idx + 2 :] data = b("") if self._parse_firstline(first_line): self._buf = [rest] else: return nb_parsed elif not self.__on_headers_complete: if data: self._buf.append(data) data = b("") try: to_parse = b("").join(self._buf) ret = self._parse_headers(to_parse) if not ret: return length nb_parsed = nb_parsed + (len(to_parse) - ret) except InvalidHeader as e: self.errno = INVALID_HEADER self.errstr = str(e) return nb_parsed elif not self.__on_message_complete: if not self.__on_message_begin: self.__on_message_begin = True if data: self._buf.append(data) data = b("") ret = self._parse_body() if ret is None: return length elif ret < 0: return ret elif ret == 0: self.__on_message_complete = True return length else: nb_parsed = max(length, ret) else: return 0
def recv_body(self): """ return last chunk of the parsed body""" body = b("").join(self._body) self._body = [] self._partial_body = False return body
def _parse_trailers(self, data): idx = data.find(b("\r\n\r\n")) if data[:2] == b("\r\n"): self._trailers = self._parse_headers(data[:idx])
def _parse_headers(self, data): idx = data.find(b("\r\n\r\n")) if idx < 0: # we don't have all headers if self._status_code == 204 and data == b("\r\n"): self._buf = [] self.__on_headers_complete = True return 0 else: return False # Split lines on \r\n keeping the \r\n on each line lines = [ bytes_to_str(line) + "\r\n" for line in data[:idx].split(b("\r\n")) ] # Parse headers into key/value pairs paying attention # to continuation lines. while len(lines): # Parse initial header name : value pair. curr = lines.pop(0) if curr.find(":") < 0: raise InvalidHeader("invalid line %s" % curr.strip()) name, value = curr.split(":", 1) name = name.rstrip(" \t").upper() if HEADER_RE.search(name): raise InvalidHeader("invalid header name %s" % name) if value.endswith("\r\n"): value = value[:-2] name, value = name.strip(), [value.lstrip()] # Consume value continuation lines while len(lines) and lines[0].startswith((" ", "\t")): curr = lines.pop(0) if curr.endswith("\r\n"): curr = curr[:-2] value.append(curr) value = ''.join(value).rstrip() # multiple headers if name in self._headers: value = "%s, %s" % (self._headers[name], value) # store new header value self._headers[name] = value # update WSGI environ key = 'HTTP_%s' % name.upper().replace('-', '_') self._environ[key] = value # detect now if body is sent by chunks. clen = self._headers.get('content-length') te = self._headers.get('transfer-encoding', '').lower() if clen is not None: try: self._clen_rest = self._clen = int(clen) except ValueError: pass else: self._chunked = (te == 'chunked') if not self._chunked: self._clen_rest = MAXSIZE # detect encoding and set decompress object encoding = self._headers.get('content-encoding') if self.decompress: if encoding == "gzip": self.__decompress_obj = zlib.decompressobj(16 + zlib.MAX_WBITS) self.__decompress_first_try = False elif encoding == "deflate": self.__decompress_obj = zlib.decompressobj() rest = data[idx + 4:] self._buf = [rest] self.__on_headers_complete = True return len(rest)
def execute(self, data, length): # end of body can be passed manually by putting a length of 0 if length == 0: self.__on_message_complete = True return length # start to parse nb_parsed = 0 while True: if not self.__on_firstline: idx = data.find(b("\r\n")) if idx < 0: self._buf.append(data) return len(data) else: self.__on_firstline = True self._buf.append(data[:idx]) first_line = bytes_to_str(b("").join(self._buf)) nb_parsed = nb_parsed + idx + 2 rest = data[idx + 2:] data = b("") if self._parse_firstline(first_line): self._buf = [rest] else: return nb_parsed elif not self.__on_headers_complete: if data: self._buf.append(data) data = b("") try: to_parse = b("").join(self._buf) ret = self._parse_headers(to_parse) if type(ret) is bool and not ret: return length nb_parsed = nb_parsed + (len(to_parse) - ret) except InvalidHeader as e: self.errno = INVALID_HEADER self.errstr = str(e) return nb_parsed elif not self.__on_message_complete: if not self.__on_message_begin: self.__on_message_begin = True if data: self._buf.append(data) data = b("") ret = self._parse_body() if ret is None: return length elif ret < 0: return ret elif ret == 0: self.__on_message_complete = True return length else: nb_parsed = max(length, ret) else: return 0