def process_record(self, record): content = None try: payload = record.payload.read() s = FakeSocket(payload) response = HTTPResponse(s) response.begin() status_code = response.status if status_code != 200: return content_type = response.getheader('Content-Type', '') if 'text/html' not in content_type: return headers = response.getheaders() content = response.read(len(payload)) except Exception: self.increment_counter('errors', 'process_record', 1) logging.error('Error processing record: {}', traceback.format_exc()) return if content is not None: content = content.strip() if not content: return for item in self.process_content(record.url, headers, content): yield item
def do_proxy(self): try: if self.s == 0: self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.s.connect((proxy_host, proxy_port)) self.s.send(self.requestline.encode('ascii') + b'\r\n') # Add Sogou Verification Tags self.headers['X-Sogou-Auth'] = x_sogou_auth t = hex(int(time.time()))[2:].rstrip('L').zfill(8) self.headers['X-Sogou-Tag'] = calc_sogou_hash(t, self.headers['Host']) self.headers['X-Sogou-Timestamp'] = t self.s.send(str(self.headers).encode('ascii') + b'\r\n') # Send Post data if(self.command=='POST'): self.s.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.s, method=self.command, buffering=True) response.begin() # Reply to the browser status = 'HTTP/1.1 ' + str(response.status) + ' ' + response.reason self.wfile.write(status.encode('ascii') + b'\r\n') h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h.encode('ascii') + b'\r\n') while True: response_data = response.read(8192) if(len(response_data) == 0): break self.wfile.write(response_data) except socket.error: print('socket error for ' + self.requestline)
def parseNewResponse(self, packetString, src, dst, tcp): responseCode = packetString[9:12] if responseCode == '200': # just okay responses for now if '\r\n\r\n' in packetString: # only proceed if the response has a body bodyIndex = packetString.index('\r\n\r\n') + 4 body = packetString[bodyIndex:] socket = FakeSocket(packetString) response = HTTPResponse(socket) response.begin() headerArray = response.getheaders() for item in headerArray: flowKey = (src, dst) if item[0] == 'content-type' and 'text/html' in item[1]: # accept any kind of text content print headerArray for item in headerArray: if item[0] == 'content-length': print 'found fixed length' length = int(item[1]) if length is not 0: self.parseFixedLengthResponse(flowKey, body, length, src, dst, tcp, responseCode) else: print "warning, content-length is zero!" elif item[0] == 'transfer-encoding' and item[1] == 'chunked': print 'found chunked' self.parseChunkedResponse(flowKey, body, src, dst, tcp, responseCode) else: print "body not found"
def issue_row(raw_row): issue_row = {} for column in COLUMN_HEADERS: column_data_raw = raw_row.findtext(column) if column_data_raw: if column in ["issueDetail", "issueBackground", "remediationBackground"]: issue_row[column] = htmltext(column_data_raw) else: issue_row[column] = column_data_raw if len(issue_row[column]) > 32000: issue_row[column] = "".join(issue_row[column][:32000], " [Text Cut Due To Length]") request = raw_row.findtext("./requestresponse/request") if request: parsed_request = HTTPRequest(binascii.a2b_base64(request)) formatted_request_a = "command : {}\nuri : {}\nrequest_version : {}".format( parsed_request.command, parsed_request.path, parsed_request.request_version ) formatted_request_b = "\n".join( "{}: {}".format(header, parsed_request.headers[header]) for header in parsed_request.headers.keys() ) issue_row["requestHeaders"] = "{}\n{}".format(formatted_request_a, formatted_request_b) response = raw_row.findtext("./requestresponse/response") if response: parsed_response = HTTPResponse(FakeSocket(binascii.a2b_base64(response))) parsed_response.begin() formatted_response = "\n".join( ["{} : {}".format(header_item[0], header_item[1]) for header_item in parsed_response.getheaders()] ) issue_row["responseHeaders"] = formatted_response return issue_row
def getheaders(self): """Return all headers from the response This gives the ability to access all HTTP headers of a discovery response. :return: dict[str, str] """ return HTTPResponse.getheaders(self)
def getheaders(self): """Return all headers from the response This gives the ability to access all HTTP headers of a discovery response. :return: dict[str, str] """ return HTTPResponse.getheaders(self)
def __init__(self, response_text): self.fp = FakeSocket(response_text) res = HTTPR(self.fp) res.begin() headers = res.getheaders() for header in headers: self.headers[header[0]] = header[1] self.length = res.getheader('Content-Length') self.chunked = res.getheader('Transfer-Encoding')
def parse_response_header(seg): source = FakeSocket(seg) response = HTTPResponse(source) response.begin() http_v = {10: 'HTTP/1.0', 11: 'HTTP/1.1'} res_data = {} res_data['status'] = response.status res_data['response_version'] = http_v[response.version] data = response.getheaders() for h in data: res_data[str(h[0])] = h[1] return res_data
def proccessHttpResponse(self, burpHttpReqResp): """ Processes only the HTTP repsonses with a CSP header """ byteResponse = burpHttpReqResp.getResponse() httpSocket = HttpDummySocket(bytearray(byteResponse)) response = HTTPResponse(httpSocket) response.begin() issues = [] for header in response.getheaders(): if header[0].lower() in ContentSecurityPolicy.HEADERS: findings = self.parseContentSecurityPolicy(header, burpHttpReqResp) issues.extend(findings) return issues
def proccessHttpResponse(self, burpHttpReqResp): """ Processes only the HTTP repsonses with a CSP header """ byteResponse = burpHttpReqResp.getResponse() httpSocket = HttpDummySocket(bytearray(byteResponse)) response = HTTPResponse(httpSocket) response.begin() issues = [] for header in response.getheaders(): if header[0].lower() in ContentSecurityPolicy.HEADERS: findings = self.parseContentSecurityPolicy(header, burpHttpReqResp) issues.extend(findings) return issues
def parse_response(response_text): """ Given an HTTP response line and headers, return a requests.Response object. """ class FakeSocket(): def __init__(self, response_str): self._file = StringIO(response_str) def makefile(self, *args, **kwargs): return self._file source = FakeSocket(response_text) response = HTTPResponse(source) response.begin() requests_response = requests.Response() requests_response.status_code = response.status requests_response.headers = CaseInsensitiveDict(response.getheaders()) return requests_response
def parse_response(response_text): """ Given an HTTP response line and headers, return a requests.Response object. """ class FakeSocket(): def __init__(self, response_str): self._file = StringIO(response_str) def makefile(self, *args, **kwargs): return self._file source = FakeSocket(response_text) response = HTTPResponse(source) response.begin() requests_response = requests.Response() requests_response.status_code = response.status requests_response.headers = CaseInsensitiveDict(response.getheaders()) return requests_response
def parse_response(response_text): """ Given an HTTP response line and headers, return a requests.Response object. """ class FakeSocket(): def __init__(self, response_str): self._file = StringIO(response_str) def makefile(self, *args, **kwargs): return self._file source = FakeSocket(response_text) response = HTTPResponse(source) response.begin() requests_response = requests.Response() requests_response.status_code = response.status headers = CaseInsensitiveDict(response.getheaders()) # Reset headers['x-robots-tag'], so that we can handle the # possibilility that multiple x-robots directives might be included # https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag # e.g. # HTTP/1.1 200 OK # Date: Tue, 25 May 2010 21:42:43 GMT # (...) # X-Robots-Tag: googlebot: nofollow # X-Robots-Tag: otherbot: noindex, nofollow # (...) # Join with a semi-colon, not a comma, so that multiple agents can # be recovered. As of 12/14/16, there doesn't appear to be any spec # describing how to do this properly (since commas don't work). # Since parsed response headers aren't archived, this convenience is # fine. However, it's worth keeping track of the situation. robots_directives = [] for directive in response.msg.getallmatchingheaders('x-robots-tag'): robots_directives.append( directive.split(": ", 1)[1].replace("\n", "").replace("\r", "")) headers['x-robots-tag'] = ";".join(robots_directives) requests_response.headers = headers return requests_response
def parse_response(response_text): """ Given an HTTP response line and headers, return a requests.Response object. """ class FakeSocket(): def __init__(self, response_str): self._file = StringIO(response_str) def makefile(self, *args, **kwargs): return self._file source = FakeSocket(response_text) response = HTTPResponse(source) response.begin() requests_response = requests.Response() requests_response.status_code = response.status headers = CaseInsensitiveDict(response.getheaders()) # Reset headers['x-robots-tag'], so that we can handle the # possibilility that multiple x-robots directives might be included # https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag # e.g. # HTTP/1.1 200 OK # Date: Tue, 25 May 2010 21:42:43 GMT # (...) # X-Robots-Tag: googlebot: nofollow # X-Robots-Tag: otherbot: noindex, nofollow # (...) # Join with a semi-colon, not a comma, so that multiple agents can # be recovered. As of 12/14/16, there doesn't appear to be any spec # describing how to do this properly (since commas don't work). # Since parsed response headers aren't archived, this convenience is # fine. However, it's worth keeping track of the situation. robots_directives = [] for directive in response.msg.getallmatchingheaders('x-robots-tag'): robots_directives.append(directive.split(": ", 1)[1].replace("\n", "").replace("\r", "")) headers['x-robots-tag'] = ";".join(robots_directives) requests_response.headers = headers return requests_response
def issue_row(raw_row): issue_row = {} for column in COLUMN_HEADERS: column_data_raw = raw_row.findtext(column) if column_data_raw: if column in [ 'issueDetail', 'issueBackground', 'remediationBackground' ]: issue_row[column] = htmltext(column_data_raw) else: issue_row[column] = column_data_raw if len(issue_row[column]) > 32000: issue_row[column] = "".join(issue_row[column][:32000], " [Text Cut Due To Length]") request = raw_row.findtext('./requestresponse/request') if request: parsed_request = HTTPRequest(binascii.a2b_base64(request)) formatted_request_a = "command : {}\nuri : {}\nrequest_version : {}".format( parsed_request.command, parsed_request.path, parsed_request.request_version) formatted_request_b = "\n".join( "{}: {}".format(header, parsed_request.headers[header]) for header in parsed_request.headers.keys()) issue_row['requestHeaders'] = "{}\n{}".format(formatted_request_a, formatted_request_b) response = raw_row.findtext('./requestresponse/response') if response: parsed_response = HTTPResponse( FakeSocket(binascii.a2b_base64(response))) parsed_response.begin() formatted_response = "\n".join([ "{} : {}".format(header_item[0], header_item[1]) for header_item in parsed_response.getheaders() ]) issue_row['responseHeaders'] = formatted_response return issue_row
def run(self): try: remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) remote.connect((self.ip, int(port))) remote.send(self.clientSendData) response = HTTPResponse(remote) response.begin() for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': self.clientRecvData += hh + ': ' + vv + '\r\n' self.clientRecvData += "\r\n" self.status = response.status print (self.ip + " response: %d"%(response.status)) while True: d = remote.recv(MAX_RECV) if (len(d)==0): break self.clientRecvData += d except: exc_type, self.error, exc_traceback = sys.exc_info() print (self.ip+":"+self.port + " error: " , exc_type , self.error) sys.stdout.flush()
def run(self): try: remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) remote.connect((self.ip, int(port))) remote.send(self.clientSendData) response = HTTPResponse(remote) response.begin() for hh, vv in response.getheaders(): if hh.upper() != 'TRANSFER-ENCODING': self.clientRecvData += hh + ': ' + vv + '\r\n' self.clientRecvData += "\r\n" self.status = response.status print(self.ip + " response: %d" % (response.status)) while True: d = remote.recv(MAX_RECV) if (len(d) == 0): break self.clientRecvData += d except: exc_type, self.error, exc_traceback = sys.exc_info() print(self.ip + ":" + self.port + " error: ", exc_type, self.error) sys.stdout.flush()
def proxy(self): doInject = False if gOptions.log > 0: print self.requestline port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] errpath = "" try: redirectUrl = self.path while True: (scm, netloc, path, params, query, _) = urlparse.urlparse(redirectUrl) if gOptions.log > 2: print urlparse.urlparse(redirectUrl) if (netloc not in gConfig["REDIRECT_DOMAINS"]): break prefixes = gConfig["REDIRECT_DOMAINS"][netloc].split('|') found = False for prefix in prefixes: prefix = prefix + "=" for param in query.split('&') : if param.find(prefix) == 0: print "redirect to " + urllib.unquote(param[len(prefix):]) redirectUrl = urllib.unquote(param[len(prefix):]) found = True continue if not found: break if (host in gConfig["HSTS_DOMAINS"]): redirectUrl = "https://" + self.path[7:] #redirect if (redirectUrl != self.path): status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") self.wfile.close() return # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] if host in gConfig["BLOCKED_DOMAINS"]: host = gConfig["PROXY_SERVER_SIMPLE"] path = self.path[len(scm)+2:] self.headers["Host"] = gConfig["PROXY_SERVER_SIMPLE"] print "use simple web proxy for " + path self.lastHost = self.headers["Host"] while True: inWhileList = False for d in domainWhiteList: if host.endswith(d): if gOptions.log > 1: print host + " in domainWhiteList: " + d inWhileList = True connectHost = host if not inWhileList: connectHost = self.getip(host) if connectHost in gConfig["BLOCKED_IPS"]: gConfig["BLOCKED_DOMAINS"][host] = True host = gConfig["PROXY_SERVER_SIMPLE"] path = self.path[len(scm)+2:] self.headers["Host"] = gConfig["PROXY_SERVER_SIMPLE"] connectHost = self.getip(host) print "use simple web proxy for " + path doInject = self.enableInjection(host, connectHost) if self.remote is None or self.lastHost != self.headers["Host"]: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if gOptions.log > 1: print "connect to " + host + ":" + str(port) self.remote.connect((connectHost, port)) if doInject: if gOptions.log > 0: print "inject http for "+host self.remote.send("\r\n\r\n") # Send requestline if path == "": path = "/" print " ".join((self.command, path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") # Send headers self.remote.send(str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False msg = "http405" try : response.begin() print host + " response: %d"%(response.status) msg = "http%d"%(response.status) except BadStatusLine: print host + " response: BadStatusLine" msg = "badStatusLine" badStatusLine = True except: raise if doInject and (response.status == 400 or response.status == 405 or badStatusLine) and host != gConfig["PROXY_SERVER_SIMPLE"]: self.remote.close() self.remote = None domainWhiteList.append(host) errpath = (msg + "/host/" + host) continue break # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if(len(response_data) == 0): break if dataLength == 0 and (len(response_data) <= 320): if response_data.find("<title>400 Bad Request") != -1 or response_data.find("<title>501 Method Not Implemented") != -1: print host + " not supporting injection" domainWhiteList.append(host) response_data = gConfig["PAGE_RELOAD_HTML"] self.wfile.write(response_data) dataLength += len(response_data) if gOptions.log > 1: print "data length: %d"%dataLength self.wfile.close() except: if self.remote: self.remote.close() self.remote = None exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code, msg = str(exc_value).split('] ') code = code[1:].split(' ')[1] if code in ["32", "10053"]: #errno.EPIPE, 10053 is for Windows if gOptions.log > 0: print "Detected remote disconnect: " + host self.wfile.close() return if code in ["61"]: #server not support injection if doInject: print "try not inject " + host domainWhiteList.append(host) self.proxy() return print "error in proxy: ", self.requestline print exc_type print str(exc_value) + " " + host errpath = "unkown/host/" + host if exc_type == socket.timeout or (exc_type == socket.error and code in ["60", "110", "10060"]): #timed out, 10060 is for Windows if gOptions.log > 0: print "add "+host+" to blocked domains" gConfig["BLOCKED_DOMAINS"][host] = True self.wfile.write("HTTP/1.1 200 OK\r\n\r\n") self.wfile.write(gConfig["PAGE_RELOAD_HTML"]) return traceback.print_tb(exc_traceback) (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if (netloc != urlparse.urlparse( gConfig["PROXY_SERVER"] )[1] and doInject): self.wfile.write(status + "\r\n") redirectUrl = gConfig["PROXY_SERVER"] + self.path[7:] if host in gConfig["HSTS_ON_EXCEPTION_DOMAINS"]: redirectUrl = "https://" + self.path[7:] self.wfile.write("Location: " + redirectUrl + "\r\n") else: if (scm.upper() != "HTTP"): msg = "schme-not-supported" else: msg = "web-proxy-fail" errpath = ("error/host/" + host + "/?msg=" + msg) self.wfile.write(status + "\r\n") self.wfile.write("Location: http://westchamberproxy.appspot.com/#" + msg + "\r\n") self.wfile.close() print "client connection closed" if errpath != "": self.netlog(errpath)
def proxy(self): inWhileList = False logging.info (self.requestline) port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if host in ["127.0.0.1", "localhost"]: basedir = os.path.dirname(__file__) htmlTemplate = os.path.join(basedir, "index.html") htmlFile = open(htmlTemplate) html = htmlFile.read() htmlFile.close() status = "HTTP/1.1 200 OK" if path == "/save": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) logging.info(str(data)) key = data["id"][0] value = data["value"][0] if key in gConfig: if type(gConfig[key]) == type(True): if value == "true": gConfig[key] = True if value == "false": gConfig[key] = False else: gConfig[key] = type(gConfig[key]) (value) self.wfile.write(status + "\r\n\r\n" + value) return if path == "/add": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) if "BLOCKED_DOMAINS" in data: domain = data["BLOCKED_DOMAINS"][0] if domain[:4] == "http": (scm, netloc, path, params, query, _) = urlparse.urlparse(domain) domain = netloc gConfig["BLOCKED_DOMAINS"][domain] = True self.wfile.write("HTTP/1.1 302 FOUND\r\n" + "Location: /\r\n\r\n" + domain) return for key in gConfig: if type(gConfig[key]) in [str,int] : html = html.replace("{"+key+"}", str(gConfig[key])) else : html = html.replace("{" + key + "}", str(gConfig[key])) self.wfile.write(status + "\r\n\r\n" + html) return try: if (host in gConfig["HSTS_DOMAINS"]): redirectUrl = "https://" + self.path[7:] #redirect status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n\r\n") return for d in domainWhiteList: if host.endswith(d): logging.info (host + " in domainWhiteList: " + d) inWhileList = True connectHost = self.getip(host) # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] if path == "": path = "/" if isDomainBlocked(host) or isIpBlocked(connectHost): if gConfig['PROXY_TYPE'] == 'socks5': self.remote = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) connectHost = gConfig['HTTP_PROXY'] port = gConfig['HTTP_PROXY_PORT'] path = self.path else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.debug( host + ":connect to " + connectHost + ":" + str(port)) self.remote.connect((connectHost, port)) print " ".join((self.command, self.path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n" + str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False try : response.begin() print host + " response: %d"%(response.status) except: self.remote.close() self.remote = None raise # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if(len(response_data) == 0): break self.wfile.write(response_data) dataLength += len(response_data) logging.debug( "data length: %d"%dataLength) self.remote.close() self.remote = None except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if host in gConfig["HSTS_ON_EXCEPTION_DOMAINS"]: redirectUrl = "https://" + self.path[7:] self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code = exc_value[0] if code in [32, 10053]: #errno.EPIPE, 10053 is for Windows logging.info ("Detected remote disconnect: " + host) return if code in [54, 10054]: #reset logging.info(host + ": reset from " + connectHost) gConfig["BLOCKED_IPS"][connectHost] = True return print "error in proxy: ", self.requestline, exc_type print str(exc_value) + " " + host if exc_type == socket.timeout or (exc_type == socket.error and code in [60, 110, 10060]): #timed out, 10060 is for Windows if not inWhileList: logging.info ("add "+host+" to blocked domains") gConfig["BLOCKED_IPS"][connectHost] = True
def proxy(self): doInject = False inWhileList = False if gOptions.log > 0: print self.requestline port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] try: redirectUrl = self.path while True: (scm, netloc, path, params, query, _) = urlparse.urlparse(redirectUrl) if gOptions.log > 2: print urlparse.urlparse(redirectUrl) if (netloc not in gConfig["REDIRECT_DOMAINS"]): break prefixes = gConfig["REDIRECT_DOMAINS"][netloc].split('|') found = False for prefix in prefixes: prefix = prefix + "=" for param in query.split('&') : if param.find(prefix) == 0: print "redirect to " + urllib.unquote(param[len(prefix):]) redirectUrl = urllib.unquote(param[len(prefix):]) found = True continue if not found: break if (host in gConfig["HSTS_DOMAINS"]): redirectUrl = "https://" + self.path[7:] #redirect if (redirectUrl != self.path): status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") return # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] connectHost = self.getip(host) if (host in gConfig["BLOCKED_DOMAINS"]) or isIpBlocked(connectHost): gConfig["BLOCKED_DOMAINS"][host] = True if gOptions.log>0 : print "add ip "+ connectHost + " to block list" return self.do_METHOD_Tunnel() if True: for d in domainWhiteList: if host.endswith(d): if gOptions.log > 0: print host + " in domainWhiteList: " + d inWhileList = True if not inWhileList: doInject = self.enableInjection(host, connectHost) self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if gOptions.log > 1: print "connect to " + host + ":" + str(port) self.remote.connect((connectHost, port)) if doInject: if gOptions.log > 0: print "inject http for "+host self.remote.send("\r\n\r\n") # Send requestline if path == "": path = "/" print " ".join((self.command, path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") # Send headers if host[-12:] == ".appspot.com": print "add version code " + gConfig["VERSION"] + " in HTTP header" self.headers["X-WCProxy"] = gConfig["VERSION"] self.headers["X-WCPasswd"] = gConfig["PROXY_PASSWD"] self.remote.send(str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False msg = "http405" try : response.begin() print host + " response: %d"%(response.status) msg = "http%d"%(response.status) except BadStatusLine: print host + " response: BadStatusLine" msg = "badStatusLine" badStatusLine = True except: raise if doInject and (response.status == 400 or response.status == 405 or badStatusLine): self.remote.close() self.remote = None if gOptions.log > 0: print host + " seem not support inject, " + msg domainWhiteList.append(host) return self.do_METHOD_Tunnel() # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if(len(response_data) == 0): break if dataLength == 0 and (len(response_data) <= 501): if response_data.find("<title>400 Bad Request") != -1 or response_data.find("<title>501 Method Not Implemented") != -1: print host + " not supporting injection" domainWhiteList.append(host) response_data = gConfig["PAGE_RELOAD_HTML"] self.wfile.write(response_data) dataLength += len(response_data) if gOptions.log > 1: print "data length: %d"%dataLength except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if host in gConfig["HSTS_ON_EXCEPTION_DOMAINS"]: redirectUrl = "https://" + self.path[7:] self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code, msg = str(exc_value).split('] ') code = code[1:].split(' ')[1] if code in ["32", "10053"]: #errno.EPIPE, 10053 is for Windows if gOptions.log > 0: print "Detected remote disconnect: " + host return if code in ["61"]: #server not support injection if doInject: print "try not inject " + host domainWhiteList.append(host) self.do_METHOD_Tunnel() return print "error in proxy: ", self.requestline print exc_type print str(exc_value) + " " + host if exc_type == socket.timeout or (exc_type == socket.error and code in ["60", "110", "10060"]): #timed out, 10060 is for Windows if not inWhileList: if gOptions.log > 0: print "add "+host+" to blocked domains" gConfig["BLOCKED_DOMAINS"][host] = True return self.do_METHOD_Tunnel()
def mitm_response(self, data): lines = data.split("\r\n") r = HTTPResponse(FakeSocket(data)) r.begin() # response line self.doc.response.status = r.status self.doc.response.responseline = lines[0] # headers ct = "" cookies = list() for header in r.getheaders(): name = header[0] value = header[1] self.doc.add_parsed_response_header(name, value) if name == "content-type": ct = value elif name == "set-cookie": cookies.append(value) # content type try: m = reContentType.search(ct) self.doc.response.content_type = m.group(1) except: pass # cookies for cookie in cookies: # TODO: the following code extracts only partial cookie data - check/rewrite try: pc = SimpleCookie(cookie) for name in pc.keys(): c = pc[name] try: value = c.value except AttributeError: value = None try: domain = c.domain except AttributeError: domain = None try: path = c.path except AttributeError: path = None try: exp = c.expires except AttributeError: exp = None self.doc.add_response_cookie(name, value, domain, path, exp) except: pass # body bodybytes = r.read() self.doc.response.body = bodybytes.decode(args.charset, args.encodingerrors) try: self.doc.save(storeResponseBody) except: pass return data
def proxy(self): doProxy = False inWhileList = False logging.info (self.requestline) port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if host in ["127.0.0.1", "localhost"]: basedir = os.path.dirname(__file__) htmlTemplate = os.path.join(basedir, "index.html") htmlFile = open(htmlTemplate) html = htmlFile.read() htmlFile.close() status = "HTTP/1.1 200 OK" if path == "/save": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) logging.info(str(data)) key = data["id"][0] value = data["value"][0] if key in gConfig: if type(gConfig[key]) == type(True): if value == "true": gConfig[key] = True if value == "false": gConfig[key] = False else: gConfig[key] = type(gConfig[key]) (value) self.wfile.write(status + "\r\n\r\n" + value) return if path == "/add": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) if "BLOCKED_DOMAINS" in data: domain = data["BLOCKED_DOMAINS"][0] if domain[:4] == "http": (scm, netloc, path, params, query, _) = urlparse.urlparse(domain) domain = netloc gConfig["BLOCKED_DOMAINS"][domain] = True self.wfile.write("HTTP/1.1 302 FOUND\r\n" + "Location: /\r\n\r\n" + domain) return #TODO: pac for key in gConfig: if type(gConfig[key]) in [str,int] : html = html.replace("{"+key+"}", str(gConfig[key])) else : html = html.replace("{" + key + "}", str(gConfig[key])) self.wfile.write(status + "\r\n\r\n" + html) return try: if (gConfig["ADSHOSTON"] and host in gConfig["ADSHOST"]): status = "HTTP/1.1 404 Not Found" self.wfile.write(status + "\r\n\r\n") return # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] connectHost = self.getip(host) logging.info ("Resolved " + host + " => " + connectHost) if isDomainBlocked(host) or isIpBlocked(connectHost): self.remote = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) logging.info("connect to " + host + ":" + str(port) + " var socks5 proxy") self.remote.connect((connectHost, port)) doProxy = True else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.debug( "connect to " + host + ":" + str(port)) self.remote.connect((connectHost, port)) # Send requestline if path == "": path = "/" print (" ".join((self.command, path, self.request_version)) + "\r\n") self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") self.remote.send(str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) response.begin() print (host + " response: %d"%(response.status)) # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if(len(response_data) == 0): break if dataLength == 0 and (len(response_data) <= 501): if response_data.find("<title>400 Bad Request") != -1 or response_data.find("<title>501 Method Not Implemented") != -1: logging.error( host + " not supporting injection") domainWhiteList.append(host) response_data = gConfig["PAGE_RELOAD_HTML"] self.wfile.write(response_data) dataLength += len(response_data) logging.debug( "data length: %d"%dataLength) except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code = exc_value[0] if code == errno.EPIPE: #errno.EPIPE, 10053 is for Windows logging.info ("Detected remote disconnect: " + host) return if code == errno.ECONNREFUSED: logging.info ("Detected ECONNREFUSED: " + host) return if code in errno.ECONNRESET: #reset logging.info(host + ": reset from " + connectHost) if not doProxy: gConfig["BLOCKED_IPS"][connectHost] = True return self.proxy() print ("error in proxy: ", self.requestline) print (exc_type) print (str(exc_value) + " " + host)
self._proxy_sock.sendall(self.mitm_request(req)) # Parse response h = HTTPResponse(self._proxy_sock) h.begin() # Get rid of the pesky header del h.msg['Transfer-Encoding'] # Time to relay the message across res = '%s %s %s\r\n' % (self.request_version, h.status, h.reason) res += '%s\r\n' % h.msg content_received = h.read() res += content_received #log the size logger_4.info('OBJECT : ' + str(len(res)) ) try: HTTPObject_received = controller.HTTPObject(h.getheaders(), url_requested , content_received, h.status, h.reason, self.request_version, webpage, phase, self.rtt) # TODO replace the 100 with RTT controller.createObject(HTTPObject_received) except Exception,e: print str(e) self.request.sendall(self.mitm_response(res)) except SocketError as e: if e.errno != errno.ECONNRESET: raise pass # Let's close off the remote end if h != None: h.close() self._proxy_sock.close()
def proxy(self): doInject = False try: print self.requestline port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] redirectUrl = self.path while True: (scm, netloc, path, params, query, _) = urlparse.urlparse(redirectUrl) if (netloc not in gConfig["REDIRECT_DOMAINS"]): break prefixes = gConfig["REDIRECT_DOMAINS"][netloc].split('|') found = False for prefix in prefixes: prefix = prefix + "=" for param in query.split('&') : if param.find(prefix) == 0: print "redirect to " + urllib.unquote(param[len(prefix):]) redirectUrl = urllib.unquote(param[len(prefix):]) found = True continue if not found: break #redirect if (redirectUrl != self.path): status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") self.connection.close() return # Remove http://[host] path = self.path[self.path.find(netloc) + len(netloc):] connectHost = self.getip(host) self.lastHost = self.headers["Host"] while True: doInject = self.enableInjection(host, connectHost) if self.remote is None or self.lastHost != self.headers["Host"]: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) print "connect to " + host + ":" + str(port) self.remote.connect((connectHost, port)) if doInject: self.remote.send("\r\n\r\n") # Send requestline self.remote.send(" ".join((self.command, (doInject and [path] or [self.path])[0], self.request_version)) + "\r\n") # Send headers self.remote.send(str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False msg = "http405" try : response.begin() print host + " response: %d"%(response.status) msg = "http%d"%(response.status) except BadStatusLine: print host + " response: BadStatusLine" msg = "badStatusLine" badStatusLine = True if doInject and (response.status == 400 or response.status == 405 or badStatusLine): self.remote.close() self.remote = None domainWhiteList.append(host) self.netlog(msg + "/host/" + host) continue break # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") while True: response_data = response.read(8192) if(len(response_data) == 0): break self.wfile.write(response_data) except: if self.remote: self.remote.close() self.remote = None exc_type, exc_value, exc_traceback = sys.exc_info() print "error in proxy: ", self.requestline print exc_type print str(exc_value) + " " + host path = "" if exc_type == socket.error: code, msg = str(exc_value).split('] ') code = code[1:].replace(" ", "") path = code + "/host/" + host + "/?msg=" + urllib.quote(msg) traceback.print_tb(exc_traceback) (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if (netloc != urlparse.urlparse( gConfig["PROXY_SERVER"] )[1]): self.wfile.write(status + "\r\n") self.wfile.write("Location: " + gConfig["PROXY_SERVER"] + self.path[7:] + "\r\n") else: status = "HTTP/1.1 302 Found" if (scm.upper() != "HTTP"): msg = "schme-not-supported" else: msg = "web-proxy-fail" path = ("error/host/" + host + "/?msg=" + msg) self.wfile.write(status + "\r\n") self.wfile.write("Location: http://liruqi.info/post/18486575704/west-chamber-proxy#" + msg + "\r\n") self.netlog(path) self.connection.close()
def proxy(self): doProxy = False inWhileList = False logging.info (self.requestline) port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if host in ["127.0.0.1", "localhost"]: basedir = os.path.dirname(__file__) htmlTemplate = os.path.join(basedir, "index.html") htmlFile = open(htmlTemplate) html = htmlFile.read() htmlFile.close() status = "HTTP/1.1 200 OK" if path == "/save": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) logging.info(str(data)) key = data["id"][0] value = data["value"][0] if key in gConfig: if type(gConfig[key]) == type(True): if value == "true": gConfig[key] = True if value == "false": gConfig[key] = False else: gConfig[key] = type(gConfig[key]) (value) self.wfile.write(status + "\r\n\r\n" + value) return if path == "/add": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) if "BLOCKED_DOMAINS" in data: domain = data["BLOCKED_DOMAINS"][0] if domain[:4] == "http": (scm, netloc, path, params, query, _) = urlparse.urlparse(domain) domain = netloc gConfig["BLOCKED_DOMAINS"][domain] = True self.wfile.write("HTTP/1.1 302 FOUND\r\n" + "Location: /\r\n\r\n" + domain) return #TODO: pac for key in gConfig: if type(gConfig[key]) in [str,int] : html = html.replace("{"+key+"}", str(gConfig[key])) else : html = html.replace("{" + key + "}", str(gConfig[key])) self.wfile.write(status + "\r\n\r\n" + html) return try: if (gConfig["ADSHOSTON"] and host in gConfig["ADSHOST"]): status = "HTTP/1.1 404 Not Found" self.wfile.write(status + "\r\n\r\n") return # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] connectHost = self.getip(host) logging.info ("Resolved " + host + " => " + connectHost) if isDomainBlocked(host) or isIpBlocked(connectHost): self.remote = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) logging.info("connect to " + host + ":" + str(port) + " var socks5 proxy") self.remote.connect((connectHost, port)) doProxy = True else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.debug( "connect to " + host + ":" + str(port)) self.remote.connect((connectHost, port)) # Send requestline if path == "": path = "/" print " ".join((self.command, path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") self.remote.send(str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) response.begin() print host + " response: %d"%(response.status) # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if(len(response_data) == 0): break if dataLength == 0 and (len(response_data) <= 501): if response_data.find("<title>400 Bad Request") != -1 or response_data.find("<title>501 Method Not Implemented") != -1: logging.error( host + " not supporting injection") domainWhiteList.append(host) response_data = gConfig["PAGE_RELOAD_HTML"] self.wfile.write(response_data) dataLength += len(response_data) logging.debug( "data length: %d"%dataLength) except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code = exc_value[0] if code in [32, 10053, errno.EPIPE]: #errno.EPIPE, 10053 is for Windows logging.info ("Detected remote disconnect: " + host) return if code == errno.ECONNREFUSED: logging.info ("Detected ECONNREFUSED: " + host) return if code in [54, 10054]: #reset logging.info(host + ": reset from " + connectHost) if not doProxy: gConfig["BLOCKED_IPS"][connectHost] = True return self.proxy() print "error in proxy: ", self.requestline print exc_type print str(exc_value) + " " + host
def main(): """ Begin Main.... """ now = datetime.now() print "Starting: " + str(now) RMC = RemoteMongoConnector.RemoteMongoConnector() zones_collection = RMC.get_zone_connection() cc_collection = RMC.get_common_crawl_connection() zone_results = zones_collection.find( {'status': { "$nin": ["false_positive", "expired"] }}) zones = [] for rec in zone_results: if rec['zone'].find("."): zones.append(rec['zone'].encode('UTF-8')) for zone in zones: print "Zone: " + zone result = get_cc_files(zone) if result != None: for file in os.listdir(OUT_DIR): if file.endswith(".gz"): print os.path.join(OUT_DIR, file) warc_f = warc.open(OUT_DIR + file) fsize = os.path.getsize(OUT_DIR + file) while fsize > warc_f.tell(): for _, record in enumerate(warc_f): if ('warc-target-uri' in record.header and record.header['warc-target-uri'].find(zone) > 0): print record.url if record.type == "response": print record.url insert_json = {} insert_json['zone'] = zone insert_json['url'] = record.url obj = urlparse(record.url) insert_json['domain'] = obj.netloc source = FakeSocket(record.payload.read()) response = HTTPResponse(source) response.begin() insert_json[ 'headers'] = response.getheaders() insert_json['status'] = response.status insert_json['version'] = response.version insert_json['reason'] = response.reason insert_json['created'] = datetime.now() cc_collection.insert(insert_json) files = glob.glob(OUT_DIR + "*") for file in files: os.remove(file) now = datetime.now() print "Ending: " + str(now)
self._proxy_sock.sendall(self.mitm_request(req)) # Parse response h = HTTPResponse(self._proxy_sock) h.begin() # Get rid of the pesky header del h.msg['Transfer-Encoding'] # Time to relay the message across res = '%s %s %s\r\n' % (self.request_version, h.status, h.reason) res += '%s\r\n' % h.msg content_received = h.read() res += content_received HTTPObject_received = controller.HTTPObject(h.getheaders(), url_requested , content_received, h.status) controller.createObject(HTTPObject_received) # Relay the message self.request.sendall(self.mitm_response(res)) except SocketError as e: if e.errno != errno.ECONNRESET: raise pass # Let's close off the remote end if h != None: h.close() self._proxy_sock.close()
def mitm_response(self, data): lines = data.split("\r\n") r = HTTPResponse(FakeSocket(data)) r.begin() # response line self.doc.response.status = r.status self.doc.response.responseline = lines[0].decode(args.charset, args.encodingerrors) # headers ct = "" cookies = list() for header in r.getheaders(): name = header[0].decode(args.charset, args.encodingerrors) value = header[1].decode(args.charset, args.encodingerrors) self.doc.add_parsed_response_header(name, value) if name == "content-type": ct = value elif name == "set-cookie": cookies.append(value) # content type try: m = reContentType.search(ct) self.doc.response.content_type = m.group(1) except: pass # cookies for cookie in cookies: # TODO: the following code extracts only partial cookie data - check/rewrite try: pc = SimpleCookie(cookie) for name in pc.keys(): c = pc[name] try: value = c.value except AttributeError: value = None try: domain = c.domain except AttributeError: domain = None try: path = c.path except AttributeError: path = None try: exp = c.expires except AttributeError: exp = None self.doc.add_response_cookie(name, value, domain, path, exp) except: pass # body bodybytes = r.read() self.doc.response.body = bodybytes.decode(args.charset, args.encodingerrors) self.doc.save(storeResponseBody) return data
def proxy(self): inWhileList = False logging.info(self.requestline) port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if host in ["127.0.0.1", "localhost"]: basedir = os.path.dirname(__file__) htmlTemplate = os.path.join(basedir, "index.html") htmlFile = open(htmlTemplate) html = htmlFile.read() htmlFile.close() status = "HTTP/1.1 200 OK" if path == "/save": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) logging.info(str(data)) key = data["id"][0] value = data["value"][0] if key in gConfig: if type(gConfig[key]) == type(True): if value == "true": gConfig[key] = True if value == "false": gConfig[key] = False else: gConfig[key] = type(gConfig[key])(value) self.wfile.write(status + "\r\n\r\n" + value) return if path == "/add": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) if "BLOCKED_DOMAINS" in data: domain = data["BLOCKED_DOMAINS"][0] if domain[:4] == "http": (scm, netloc, path, params, query, _) = urlparse.urlparse(domain) domain = netloc gConfig["BLOCKED_DOMAINS"][domain] = True self.wfile.write("HTTP/1.1 302 FOUND\r\n" + "Location: /\r\n\r\n" + domain) return for key in gConfig: if type(gConfig[key]) in [str, int]: html = html.replace("{" + key + "}", str(gConfig[key])) else: html = html.replace("{" + key + "}", str(gConfig[key])) self.wfile.write(status + "\r\n\r\n" + html) return try: if (host in gConfig["HSTS_DOMAINS"]): redirectUrl = "https://" + self.path[7:] #redirect status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n\r\n") return for d in domainWhiteList: if host.endswith(d): logging.info(host + " in domainWhiteList: " + d) inWhileList = True connectHost = self.getip(host) # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] if path == "": path = "/" if isDomainBlocked(host) or isIpBlocked(connectHost): if gConfig['PROXY_TYPE'] == 'socks5': self.remote = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) connectHost = gConfig['HTTP_PROXY'] port = gConfig['HTTP_PROXY_PORT'] path = self.path else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.debug(host + ":connect to " + connectHost + ":" + str(port)) self.remote.connect((connectHost, port)) print " ".join( (self.command, self.path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n" + str(self.headers) + "\r\n") # Send Post data if (self.command == 'POST'): self.remote.send( self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False try: response.begin() print host + " response: %d" % (response.status) except: self.remote.close() self.remote = None raise # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper() != 'TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if (len(response_data) == 0): break self.wfile.write(response_data) dataLength += len(response_data) logging.debug("data length: %d" % dataLength) self.remote.close() self.remote = None except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if host in gConfig["HSTS_ON_EXCEPTION_DOMAINS"]: redirectUrl = "https://" + self.path[7:] self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code = exc_value[0] if code in [32, 10053]: #errno.EPIPE, 10053 is for Windows logging.info("Detected remote disconnect: " + host) return if code in [54, 10054]: #reset logging.info(host + ": reset from " + connectHost) gConfig["BLOCKED_IPS"][connectHost] = True return print "error in proxy: ", self.requestline, exc_type print str(exc_value) + " " + host if exc_type == socket.timeout or ( exc_type == socket.error and code in [60, 110, 10060]): #timed out, 10060 is for Windows if not inWhileList: logging.info("add " + host + " to blocked domains") gConfig["BLOCKED_IPS"][connectHost] = True
def proxy(self): doInject = False inWhileList = False logging.info(self.requestline) port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if host in ["127.0.0.1", "localhost"]: basedir = os.path.dirname(__file__) htmlTemplate = os.path.join(basedir, "index.html") htmlFile = open(htmlTemplate) html = htmlFile.read() htmlFile.close() status = "HTTP/1.1 200 OK" if path == "/save": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) logging.info(str(data)) key = data["id"][0] value = data["value"][0] if key in gConfig: if type(gConfig[key]) == type(True): if value == "true": gConfig[key] = True if value == "false": gConfig[key] = False else: gConfig[key] = type(gConfig[key])(value) hookInit() self.wfile.write(status + "\r\n\r\n" + value) return if path == "/add": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) if "BLOCKED_DOMAINS" in data: domain = data["BLOCKED_DOMAINS"][0] if domain[:4] == "http": (scm, netloc, path, params, query, _) = urlparse.urlparse(domain) domain = netloc gConfig["BLOCKED_DOMAINS"][domain] = True self.wfile.write("HTTP/1.1 302 FOUND\r\n" + "Location: /\r\n\r\n" + domain) return for key in gConfig: if type(gConfig[key]) in [str, int]: html = html.replace("{" + key + "}", str(gConfig[key])) else: html = html.replace("{" + key + "}", str(gConfig[key])) self.wfile.write(status + "\r\n\r\n" + html) return try: if (host in gConfig["HSTS_DOMAINS"]): redirectUrl = "https://" + self.path[7:] #redirect status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n\r\n") return if (gConfig["ADSHOSTON"] and host in gConfig["ADSHOST"]): status = "HTTP/1.1 404 Not Found" self.wfile.write(status + "\r\n\r\n") return # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] for d in domainWhiteList: if host.endswith(d): logging.info(host + " in domainWhiteList: " + d) inWhileList = True connectHost = self.getip(host) if not inWhileList: doInject = self.enableInjection(host, connectHost) logging.info("Resolved " + host + " => " + connectHost) if isDomainBlocked(host) or isIpBlocked(connectHost): if gConfig["PROXY_TYPE"] == "socks5": self.remote = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) logging.info("connect to " + host + ":" + str(port) + " var socks5 proxy") self.remote.connect((connectHost, port)) else: logging.info(host + " blocked, try goagent.") return self.do_METHOD_Tunnel() else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.debug("connect to " + host + ":" + str(port)) self.remote.connect((connectHost, port)) if doInject: logging.info("inject http for " + host) self.remote.send("\r\n\r\n") # Send requestline if path == "": path = "/" print " ".join((self.command, path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") self.remote.send(str(self.headers) + "\r\n") # Send Post data if (self.command == 'POST'): self.remote.send( self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False msg = "http405" try: response.begin() print host + " response: %d" % (response.status) msg = "http%d" % (response.status) except BadStatusLine: print host + " response: BadStatusLine" msg = "badStatusLine" badStatusLine = True except: raise if doInject and (response.status == 400 or response.status == 405 or badStatusLine): self.remote.close() self.remote = None logging.info(host + " seem not support inject, " + msg) return self.do_METHOD_Tunnel() # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper() != 'TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if (len(response_data) == 0): break if dataLength == 0 and (len(response_data) <= 501): if response_data.find( "<title>400 Bad Request" ) != -1 or response_data.find( "<title>501 Method Not Implemented") != -1: logging.error(host + " not supporting injection") domainWhiteList.append(host) response_data = gConfig["PAGE_RELOAD_HTML"] self.wfile.write(response_data) dataLength += len(response_data) logging.debug("data length: %d" % dataLength) except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if host in gConfig["HSTS_ON_EXCEPTION_DOMAINS"]: redirectUrl = "https://" + self.path[7:] self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code, msg = str(exc_value).split('] ') code = code[1:].split(' ')[1] if code in ["32", "10053"]: #errno.EPIPE, 10053 is for Windows logging.info("Detected remote disconnect: " + host) return if code in ["54"]: return self.do_METHOD_Tunnel() if code in ["61"]: #server not support injection if doInject: logging.info("try not inject " + host) domainWhiteList.append(host) return self.do_METHOD_Tunnel() print "error in proxy: ", self.requestline print exc_type print str(exc_value) + " " + host if exc_type == socket.timeout or ( exc_type == socket.error and code in ["60", "110", "10060" ]): #timed out, 10060 is for Windows if not inWhileList: logging.info("add " + host + " to blocked domains") gConfig["BLOCKED_DOMAINS"][host] = True return self.do_METHOD_Tunnel()
def do_COMMAND(self): global allCache global dumpToDisk cacheHit = False sendNotModifiesResponse = False cacheurl = self.path print cacheurl # checking if we have a cache hit if not ('?' in cacheurl) and (cacheurl in allCache): #in all cache array add two more fields max-age and date,time when added, and check if the object is valid before #making cachehit true cacheHit = True #------ code below uncomment # if 'If-Modified-Since' in self.headers: # last_modified_date = self.headers['If-Modified-Since'] # formatter_string = "%a, %d %b %Y %H:%M:%S %Z" # in_req_modified_date = datetime.strptime(last_modified_date, formatter_string) # #get the other date from cache # in_cache_modified_date = allCache[cacheurl][3] # if in_req_modified_date > in_cache_modified_date: # #can be served from cache sendNotModifiesResponse = False # No cache hit, fetch from Internet if not cacheHit: # Is this an SSL tunnel? if not self.is_connect: try: # Connect to destination self._connect_to_host() except Exception, e: self.send_error(500, str(e)) return # Extract path # Build request req = '%s %s %s\r\n' % (self.command, self.path, self.request_version) # Add headers to the request req += '%s\r\n' % self.headers # Append message body if present to the request if 'Content-Length' in self.headers: req += self.rfile.read(int(self.headers['Content-Length'])) # Send it down the pipe! self._proxy_sock.sendall(self.mitm_request(req)) # Parse response h = HTTPResponse(self._proxy_sock) h.begin() # Get rid of the pesky header del h.msg['Transfer-Encoding'] # Time to relay the message across res = '%s %s %s\r\n' % (self.request_version, h.status, h.reason) res += '%s\r\n' % h.msg toWrite=h.read() res += toWrite # fetched an object from the Internet, need to add it to memory and dump it to disk if not ("?" in cacheurl): fileName=cacheFileName() tempObject=CacheObject(cacheurl, h.getheaders(), h.status, h.reason, toWrite,fileName) # current_datetime = datetime.now() # allCache[cacheurl]=['memory', tempObject, fileName, current_datetime] dumpToDisk.append(tempObject) h.close() self._proxy_sock.close()
# Send it down the pipe! self._proxy_sock.sendall(self.mitm_request(req)) # Parse response h = HTTPResponse(self._proxy_sock) h.begin() # Get rid of the pesky header del h.msg['Transfer-Encoding'] # Time to relay the message across res = '%s %s %s\r\n' % (self.request_version, h.status, h.reason) res += '%s\r\n' % h.msg content_received = h.read() res += content_received #log the size try: http_obj = recorder.http_object(h.getheaders(), url_requested, content_received, h.status, h.reason, self.request_version, webpage, self.rtt) recorder.save_obj(http_obj) except Exception, e: print str(e) self.request.sendall(self.mitm_response(res)) except SocketError as e: if e.errno != errno.ECONNRESET: raise pass # Let's close off the remote end if h != None:
for k in self.headers: req_msg.add_header(k, self.headers[k]) # Send it down the pipe! self._proxy_sock.sendall(self.mitm_request(req_msg)) # Parse response h = HTTPResponse(self._proxy_sock) h.begin() # Get rid of the pesky header del h.msg['Transfer-Encoding'] # Time to relay the message across headline = '%s %s %s' % (self.request_version, h.status, h.reason) rsp_msg = HttpMsg(headline, h.read()) for tup in h.getheaders(): k, v = tup rsp_msg.add_header(k, v) # Let's close off the remote end h.close() self._proxy_sock.close() # Relay the message self.request.sendall(self.mitm_response(rsp_msg)) def mitm_request(self, msg): for p in self.server._req_plugins: data = p(self.server, self).do_request(msg) result = msg.headline + '\r\n'
def proxy(self): doInject = False inWhileList = False logging.info (self.requestline) port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if host in ["127.0.0.1", "localhost"]: basedir = os.path.dirname(__file__) htmlTemplate = os.path.join(basedir, "index.html") htmlFile = open(htmlTemplate) html = htmlFile.read() htmlFile.close() status = "HTTP/1.1 200 OK" if path == "/save": postData = self.rfile.read(int(self.headers['Content-Length'])) data = urlparse.parse_qs(postData) logging.info(str(data)) key = data["id"][0] value = data["value"][0] if key in gConfig: if type(gConfig[key]) == type(True): if value == "true": gConfig[key] = True if value == "false": gConfig[key] = False else: gConfig[key] = type(gConfig[key]) (value) hookInit() self.wfile.write(status + "\r\n\r\n" + value) return for key in gConfig: if type(gConfig[key]) in [str,int] : html = html.replace("{"+key+"}", str(gConfig[key])) else : html = html.replace("{" + key + "}", str(gConfig[key])) self.wfile.write(status + "\r\n\r\n" + html) return try: if (host in gConfig["HSTS_DOMAINS"]): redirectUrl = "https://" + self.path[7:] #redirect status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n\r\n") return if (gConfig["ADSHOSTON"] and host in gConfig["ADSHOST"]): status = "HTTP/1.1 404 Not Found" self.wfile.write(status + "\r\n\r\n") return # Remove http://[host] , for google.com.hk path = self.path[self.path.find(netloc) + len(netloc):] connectHost = self.getip(host) logging.info ("Resolved " + host + " => " + connectHost) rootDomain = string.join(host.split('.')[-2:], '.') if True: for d in domainWhiteList: if host.endswith(d): logging.info (host + " in domainWhiteList: " + d) inWhileList = True if not inWhileList: doInject = self.enableInjection(host, connectHost) if isDomainBlocked(host) or isIpBlocked(connectHost): if gConfig["PROXY_TYPE"] == "socks5": self.remote = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) logging.info("connect to " + host + ":" + str(port) + " var socks5 proxy") self.remote.connect((connectHost, port)) else: logging.info(host + " blocked, try goagent.") return self.do_METHOD_Tunnel() else: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) logging.debug( "connect to " + host + ":" + str(port)) self.remote.connect((connectHost, port)) if doInject: logging.info ("inject http for "+host) self.remote.send("\r\n\r\n") # Send requestline if path == "": path = "/" print " ".join((self.command, path, self.request_version)) + "\r\n" self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") self.remote.send(str(self.headers) + "\r\n") # Send Post data if(self.command=='POST'): self.remote.send(self.rfile.read(int(self.headers['Content-Length']))) response = HTTPResponse(self.remote, method=self.command) badStatusLine = False msg = "http405" try : response.begin() print host + " response: %d"%(response.status) msg = "http%d"%(response.status) except BadStatusLine: print host + " response: BadStatusLine" msg = "badStatusLine" badStatusLine = True except: raise if doInject and (response.status == 400 or response.status == 405 or badStatusLine): self.remote.close() self.remote = None logging.info (host + " seem not support inject, " + msg) return self.do_METHOD_Tunnel() # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = '' for hh, vv in response.getheaders(): if hh.upper()!='TRANSFER-ENCODING': h += hh + ': ' + vv + '\r\n' self.wfile.write(h + "\r\n") dataLength = 0 while True: response_data = response.read(8192) if(len(response_data) == 0): break if dataLength == 0 and (len(response_data) <= 501): if response_data.find("<title>400 Bad Request") != -1 or response_data.find("<title>501 Method Not Implemented") != -1: logging.error( host + " not supporting injection") domainWhiteList.append(host) response_data = gConfig["PAGE_RELOAD_HTML"] self.wfile.write(response_data) dataLength += len(response_data) logging.debug( "data length: %d"%dataLength) except: if self.remote: self.remote.close() self.remote = None (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) status = "HTTP/1.1 302 Found" if host in gConfig["HSTS_ON_EXCEPTION_DOMAINS"]: redirectUrl = "https://" + self.path[7:] self.wfile.write(status + "\r\n") self.wfile.write("Location: " + redirectUrl + "\r\n") exc_type, exc_value, exc_traceback = sys.exc_info() if exc_type == socket.error: code, msg = str(exc_value).split('] ') code = code[1:].split(' ')[1] if code in ["32", "10053"]: #errno.EPIPE, 10053 is for Windows logging.info ("Detected remote disconnect: " + host) return if code in ["61"]: #server not support injection if doInject: logging.info( "try not inject " + host) domainWhiteList.append(host) self.do_METHOD_Tunnel() return print "error in proxy: ", self.requestline print exc_type print str(exc_value) + " " + host if exc_type == socket.timeout or (exc_type == socket.error and code in ["60", "110", "10060"]): #timed out, 10060 is for Windows if not inWhileList: logging.info ("add "+host+" to blocked domains") gConfig["BLOCKED_DOMAINS"][host] = True return self.do_METHOD_Tunnel()
def proxy(self): doInject = False try: print self.requestline self.supportCrLfPrefix = True port = 80 host = self.headers["Host"] if host.find(":") != -1: port = int(host.split(":")[1]) host = host.split(":")[0] # Remove http://[host] path = self.path[self.path.find(host) + len(host) :] connectHost = self.getip(host) doInject = self.enableInjection(host, connectHost) if self.remote is None or self.lastHost != self.headers["Host"]: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.remote.connect((connectHost, port)) if doInject: self.remote.send("\r\n\r\n") self.lastHost = self.headers["Host"] while True: # Send requestline self.remote.send(" ".join((self.command, path, self.request_version)) + "\r\n") # Send headers self.remote.send(str(self.headers) + "\r\n") # Send Post data if self.command == "POST": self.remote.send(self.rfile.read(int(self.headers["Content-Length"]))) response = HTTPResponse(self.remote, method=self.command) response.begin() if response.status == 400 and self.supportCrLfPrefix == True: while response.read(8192): pass self.supportCrLfPrefix = False continue break # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status + "\r\n") h = "" for hh, vv in response.getheaders(): if hh.upper() != "TRANSFER-ENCODING": h += hh + ": " + vv + "\r\n" self.wfile.write(h + "\r\n") while True: response_data = response.read(8192) if len(response_data) == 0: break self.wfile.write(response_data) except: if self.remote: self.remote.close() self.remote = None exc_type, exc_value, exc_traceback = sys.exc_info() print "error in proxy: ", self.requestline print exc_type print exc_value traceback.print_tb(exc_traceback) (scm, netloc, path, params, query, _) = urlparse.urlparse(self.path) if scm.upper() != "HTTP": self.wfile.write("HTTP/1.1 500 Server Error " + scm.upper() + "\r\n") elif netloc == urlparse.urlparse(PROXY_SERVER)[1]: self.wfile.write("HTTP/1.1 500 Server Error, Cannot connect to proxy " + "\r\n") else: if doInject: status = "HTTP/1.1 302 Found" self.wfile.write(status + "\r\n") self.wfile.write("Location: " + PROXY_SERVER + self.path[7:] + "\r\n") else: print ("Not redirect " + self.path) self.wfile.write("HTTP/1.1 500 Server Error Unkown Error\r\n") self.connection.close()