def save_response_binaries(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) file_path = str(file_path) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) if not os.path.exists(output): os.mkdir(output) subfile.setOutput(output) ok = subfile.main() # save the files info at the db also return True except Exception, ex: return False
def convert_gzip_files(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) # path is created as unicode, convert it a regular string for hachoir operation file_path = str(file_path) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) subfile.setOutput(output) http_details = filter(lambda x: x.flow_details.id == detail.id ,HTTPDetails.objects.filter(http_type="response")) file_ext = ".txt" for http in http_details: if http.content_type: filename = subfile.output.createFilename(file_ext) if http.content_encoding == "gzip": r = open("/".join([output, filename]), "r") body = r.read() r.close() data = StringIO.StringIO(body) gzipper = gzip.GzipFile(fileobj=data) html = gzipper.read() filename = filename.split(".")[0] + ".html" w = open("/".join([output, filename]), "w") w.write(html) w.close() return True except Exception, ex: print ex return False
def save_response_files(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) # path is created as unicode, convert it a regular string for hachoir operation file_path = str(file_path) strings = ["Content-Type: text/html", "Content-Type: application/x-javascript", "Content-Type: text/css"] file_handler = FileHandler() responses = [] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: responses.append(item[0]) empty_lines = [] strings = ["\r\n\r\n"] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: empty_lines.append(item[0]) http_lines = [] strings = ["HTTP/1.1"] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: http_lines.append(item[0]) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) subfile.setOutput(output) for x in range(len(responses)): # here i have the request header data = file_handler.data #f = data[empty_lines[x]:http_lines[x+1]] file_ext = ".txt" #if ("html" in f or "body" in f): # file_ext = ".html" #elif ("script" in f): # file_ext = ".js" #else: # select the closest empty line empty_lines.append(responses[x]) empty_lines.sort() index = empty_lines.index(responses[x]) offset = empty_lines[index+1] size = None try: size = http_lines[x+1]-2 except IndexError: size = stream.size f = data[offset+4:size] filename = subfile.output.createFilename(file_ext) w = open("/".join([output, filename]), "w") w.write(f) w.close() # saving the hachoir saved binaries to the db with the created txt files if detail.protocol == "http": http_files = os.listdir(output) #http_files = filter(lambda x: x.split(".")[-1] != 'txt', http_files) # no need to take the txt files if len(http_files) > 0: http_li = filter(lambda x: x.flow_details.id == detail.id, HTTPDetails.objects.all()) for http in http_li: http.files = http_files http.save() return True except Exception, ex: print ex return False