def from_string(data): """Parse http response to status code, headers and body Based on: https://stackoverflow.com/questions/24728088/python-parse-http-response-string """ class FakeSocket: """Fake socket to simulate received http response content""" def __init__(self, response_str): self._file = BytesIO(response_str.encode('utf-8')) def makefile(self, *args, **kwargs): """Fake file that provides string content""" # pylint: disable=unused-argument return self._file source = FakeSocket(data) response = HTTPResponse(source) response.begin() return ODataHttpResponse( response.getheaders(), response.status, response.read(len(data)) # the len here will give a 'big enough' value to read the whole content )
def success(self, data: HTTPResponse) -> dict: return { 'code': data.status, 'reason': data.reason, 'headers': data.getheaders(), 'data': data.read() }
def processHttpResponse(http_response_text, key_enrich, value_enrich): return_http_response = bytearray(b'') response = None http_first_chars = b'' if (len(http_response_text) > 4): http_first_chars = http_response_text[:4] if (http_first_chars == b'HTTP'): response = HTTPResponse(HTTPResponseSocket(http_response_text)) response.begin() ##??? print (str(http_first_chars)) if (response != None): str_version = "HTTP/1.0" if (response.version == 11): str_version = "HTTP/1.1" line_text = ( str_version + ' ' + str(response.status) + ' ' + str(response.reason) + '\r\n' ) return_http_response.extend(line_text.encode('utf-8')) return_http_response.extend( modify_http_header(response.getheaders(), key_enrich, value_enrich)) return_http_response.extend(response.read()) return return_http_response
def _parse_http_response(self, response_str): """Parse response string into an HTTP object.""" sock = TmpSock(response_str) res = HTTPResponse(sock) res.begin() print('res', res.getheaders()) return res
def processHttpResponse(http_response_text, key_enrich, value_enrich): return_http_response = bytearray(b'') response = None http_first_chars = b'' if (len(http_response_text) > 4): http_first_chars = http_response_text[:4] if (http_first_chars == b'HTTP'): response = HTTPResponse(HTTPResponseSocket(http_response_text)) response.begin() ##??? print (str(http_first_chars)) if (response != None): str_version = "HTTP/1.0" if (response.version == 11): str_version = "HTTP/1.1" line_text = (str_version + ' ' + str(response.status) + ' ' + str(response.reason) + '\r\n') return_http_response.extend(line_text.encode('utf-8')) return_http_response.extend( modify_http_header(response.getheaders(), key_enrich, value_enrich)) return_http_response.extend(response.read()) return return_http_response
def response_length(resp: HTTPResponse): """In case of chunked encoding, calc length from content-range. TODO""" headers = dict((key.lower(), val) for key, val in resp.getheaders()) if resp.getcode() == 206 and 'content-range' in headers: match = re.match(r' *bytes *(\d+) *- *(\d+)', headers['content-range']) if match is None: raise RuntimeError("unexpected content-range: %s" % headers['content-range']) start_byte, end_byte = match.groups() return int(end_byte) - int(start_byte) + 1 else: return resp.length
def read_response(self, sock): response = HTTPResponse(sock) response.begin() content = response.read() status_code = response.status reason = response.reason headers = dict(response.getheaders()) response.close() return FakeResponse(status_code, reason, headers, content)
def from_http_client_response(obj: HTTPResponse) -> Response: body = obj.read() res = ResponseBuilder.from_dict( dict( statusCode=obj.getcode(), headers={k: v for k, v in obj.getheaders()}, body=body if isinstance(body, str) else body.decode("utf8") if isinstance(body, bytes) else None, )) ResponseBuilder.validate(res) return res
def get_body_str(res: HTTPResponse) -> str: try: for h in res.getheaders(): if h[0] == "Content-Type": if "charset=" in h[1].split(';')[1]: encoding: str = h[1].split(';')[1].split("=")[1] except: encoding: str = "utf-8" result: str = '' for line in res.readlines(): result += (line.decode(encoding)) return result
def parse_http_response(raw_bytes): """Pretty hacky way to parse an HTTP response using the python standard library http utilities. Probably should be replaced at some point. """ f = _FakeSocket(raw_bytes) response = HTTPResponse(f) response.begin() return Response( status_code=response.status, headers=dict(response.getheaders()), body=response.read(len(raw_bytes)), # Reads slightly too many bytes )
def process_file(filename): global file_cntr, resp_file_cntr, inspected_cntr, valid_cntr with open(filename, 'rb') as file: file_cntr += 1 #increment examined file counter data = file.read(100000000) #100MB #data = file.read() #there may be more than one response in one stream so bytes are splitten in #place of (for example) HTTP/1.1 200 which should indicate response start splitlist = re.split(b'(HTTP/\d.\d \d{3})', data) splitlistlen = len(splitlist) #length of split list #print(splitlist) #length have to be odd: # first item should be empty (bytes before first header) # second item is header (HTTP/1.1 200) # third item is rest of response # same for next responses if not (splitlistlen % 2): sys.stderr.write('Splitlist is even in {}\n'.format(filename)) exit(1) if splitlist[0] == data: #there is no HTTP request found at all return if splitlist[0] != b'': #there are some data before first response #sys.stderr.write('Data before first request in {}\n'.format(filename)) pass resp_file_cntr += 1 #for every response header for i in range(1, len(splitlist), 2): inspected_cntr += 1 #increment counter #join header with rest of the response, create fake socket from #response bytes and pass it into HTTPResponse constructor resp = HTTPResponse(FakeSocket(splitlist[i] + splitlist[i + 1])) payload = bytes() try: resp.begin() #HTTPResponse object have to be initialized payload = resp.read() except: continue ################################################## #now the response is parsed and ready valid_cntr += 1 #count_headers(resp.getheaders()) #content_type(resp.getheaders()) if payload != b'': process_payload(resp.getheaders(), payload)
def relay(self, upstream: HTTPResponse): # add upstream response headers after sanitization headers = sanitize_headers(upstream.getheaders()) # transport level headers total_length = response_length(upstream) down_stream = self.wfile if total_length is None: headers.append(('Transfer-Encoding', 'chunked')) down_stream = ChunkedEncoder(self.wfile) else: headers.append(('Content-Length', str(total_length))) # send headers self.log_message("got upstream response. relaying to client...") if upstream.getcode() == 206 and 'Range' not in self.headers: # translate 206 -> 200 if the downstream is not a range request self.send_response(200) else: self.send_response(upstream.getcode()) for key, val in headers: self.send_header(key, val) self.end_headers() # relay data while True: # read try: data = upstream.read(self.read_size) except (TimeoutError, IncompleteRead) as e: self.log_error("upstream closed prematurely: %s" % e) self.close_connection = True break if len(data) == 0: # EOF if isinstance(down_stream, ChunkedEncoder): down_stream.finish() break # send try: down_stream.write(data) except ConnectionError as e: self.log_error("client closed prematurely: %s" % e.strerror) self.close_connection = True break self.log_message("done!")
def do_proxy(self): try: if self._socket is None: self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._socket.connect((proxy_host, proxy_port)) self._socket.send(self.requestline.encode('ascii') + b'\r\n') log.d(self.requestline, ostream) # Add Sogou Verification Tags self.headers['X-Sogou-Auth'] = x_sogou_auth t = hex(int(time.time()))[2:].rstrip('L').zfill(8) self.headers['X-Sogou-Tag'] = sogou_hash( t, self.headers['Host']) self.headers['X-Sogou-Timestamp'] = t self._socket.send(str(self.headers).encode('ascii') + b'\r\n') # Send POST data if self.command == 'POST': self._socket.send( self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self._socket, method=self.command) response.begin() # Response status = 'HTTP/1.1 %s %s' % (response.status, response.reason) self.wfile.write(status.encode('ascii') + b'\r\n') h = '' for hh, vv in response.getheaders(): if hh.upper() != 'TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h.encode('ascii') + b'\r\n') while True: response_data = response.read(8192) if len(response_data) == 0: break self.wfile.write(response_data) except socket.error: log.e('Socket error for ' + self.requestline, ostream)
def _update_results(self, resp: HTTPResponse, success: bool): self.results['http.success'] = success self.results['http.resp.status'] = resp.status self.results['http.resp.reason'] = resp.reason self.results['http.resp.version'] = resp.version body = resp.read() self.results['http.resp.body'] = body self.results['http.resp.body_length'] = len(body) for key, value in resp.getheaders(): key = 'http.resp.header.' + key.replace(' ', '_').replace( '-', '_').lower() if key in self.results: # make it a list if isinstance(self.results[key], list): self.results[key].append(value) else: self.results[key] = [self.results[key], value] else: self.results[key] = value self.log.debug(f"Found response header: {key}: {value}")
def _headers(self, res: HTTPResponse) -> Dict[str, str]: return dict(res.getheaders())
def get_headers_str(res: HTTPResponse) -> str: result: str = "HTTP " + str(res.getcode()) + '\n' headers: list = res.getheaders() for t in headers: result += (t[0] + ': ' + t[1] + '\n') return result
def set_ResponseHeaders(self, response: HTTPResponse) -> None: ''' Gets the response headers from a URL response. :param response: A response from a url request. ''' self.responseHeaders = dict(response.getheaders())