def subfile(self, filePath): # hachoir-subfile is a tool based on hachoir-parser to find subfiles in any binary stream. # Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-subfile # bypass sys.stdout, sys.stderr oldStdOut = sys.stdout oldStdErr = sys.stderr outputStdErr = StringIO.StringIO() outputStdOut = StringIO.StringIO() sys.stdout = outputStdOut sys.stderr = outputStdErr stream = FileInputStream(unicodeFilename(filePath), real_filename=filePath) # Search for subfiles subfile = SearchSubfile(stream, 0, None) subfile.loadParsers(categories=None, parser_ids=None) subfile.main() # sys.stdout, sys.stderr reset sys.stdout = oldStdOut sys.stderr = oldStdErr # parse stdout, stderr from SearchSubfile return self.parse(outputStdOut.getvalue(), outputStdErr.getvalue())
def save_response_binaries(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) file_path = str(file_path) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) if not os.path.exists(output): os.mkdir(output) subfile.setOutput(output) ok = subfile.main() # save the files info at the db also return True except Exception, ex: return False
def convert_gzip_files(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) # path is created as unicode, convert it a regular string for hachoir operation file_path = str(file_path) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) subfile.setOutput(output) http_details = filter(lambda x: x.flow_details.id == detail.id ,HTTPDetails.objects.filter(http_type="response")) file_ext = ".txt" for http in http_details: if http.content_type: filename = subfile.output.createFilename(file_ext) if http.content_encoding == "gzip": r = open("/".join([output, filename]), "r") body = r.read() r.close() data = StringIO.StringIO(body) gzipper = gzip.GzipFile(fileobj=data) html = gzipper.read() filename = filename.split(".")[0] + ".html" w = open("/".join([output, filename]), "w") w.write(html) w.close() return True except Exception, ex: print ex return False
def EXTRACT_EMBEDDED(s, buff): EXTRACT_FILES = {} CHILD_BUFF = {} stream = StringInputStream(buff) subfile = SearchSubfile(stream) subfile.loadParsers(categories=None, parser_ids=None) subfile.stats = {} subfile.next_offset = None counter = 0 last_start = 0 last_end = 0 while subfile.current_offset < subfile.size: subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic(subfile.current_offset): # Don't care about extracting the base file, just what's within it # False positives often return sizes exceeding the size of the file # they also may not even posess a content size at all, weed em out if offset != 0 and parser.content_size != subfile.size \ and parser.content_size < subfile.size and parser.content_size: start = offset // 8 end = start + parser.content_size // 8 # We want to make sure we aren't pulling sub files out of ones # we are already extracting, we will be doing that later anyway # when the module is run again on the 'buffer' returned key value if start >= last_end: EXTRACT_FILES['Object_%s' % counter] = OrderedDict([ ('Start', '%s bytes' % start), ('End', '%s bytes' % end), ('Description', parser.description), ('Buffer', buff[start:end]) ]) counter += 1 last_start = start last_end = end subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) return EXTRACT_FILES
def save_response_files(self, path, hash_value): try: flow = Flow.objects.get(hash_value=hash_value) flow_details = flow.details for detail in flow_details: # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat source_str = ":".join([detail.src_ip, str(detail.sport)]) destination_str = ":".join([detail.dst_ip, str(detail.dport)]) flow_str = "-".join([source_str, destination_str]) resp_file = "_".join(["contents", flow_str,"resp.dat"]) file_path = "/".join([path, resp_file]) # path is created as unicode, convert it a regular string for hachoir operation file_path = str(file_path) strings = ["Content-Type: text/html", "Content-Type: application/x-javascript", "Content-Type: text/css"] file_handler = FileHandler() responses = [] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: responses.append(item[0]) empty_lines = [] strings = ["\r\n\r\n"] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: empty_lines.append(item[0]) http_lines = [] strings = ["HTTP/1.1"] search_li = file_handler.search(file_path, strings) if not search_li: continue for item in search_li: http_lines.append(item[0]) try: stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path) except NullStreamError: continue subfile = SearchSubfile(stream, 0, None) subfile.loadParsers() root = "/".join([path, "html-files"]) if not os.path.exists(root): os.makedirs(root) output = "/".join([root, flow_str]) output = str(output) subfile.setOutput(output) for x in range(len(responses)): # here i have the request header data = file_handler.data #f = data[empty_lines[x]:http_lines[x+1]] file_ext = ".txt" #if ("html" in f or "body" in f): # file_ext = ".html" #elif ("script" in f): # file_ext = ".js" #else: # select the closest empty line empty_lines.append(responses[x]) empty_lines.sort() index = empty_lines.index(responses[x]) offset = empty_lines[index+1] size = None try: size = http_lines[x+1]-2 except IndexError: size = stream.size f = data[offset+4:size] filename = subfile.output.createFilename(file_ext) w = open("/".join([output, filename]), "w") w.write(f) w.close() # saving the hachoir saved binaries to the db with the created txt files if detail.protocol == "http": http_files = os.listdir(output) #http_files = filter(lambda x: x.split(".")[-1] != 'txt', http_files) # no need to take the txt files if len(http_files) > 0: http_li = filter(lambda x: x.flow_details.id == detail.id, HTTPDetails.objects.all()) for http in http_li: http.files = http_files http.save() return True except Exception, ex: print ex return False
def file_subfiles(filename): if filename and filename != "": offset, size, memorylimit, filemaxsize = 0, 999999, 50 * 1024 * 1024, 100 * 1024 * 1024 stream = FileInputStream(unicodeFilename(filename), real_filename=filename) subfile = SearchSubfile(stream, offset, size) try: subfile.loadParsers() subfile.stats = {} subfile.verbose = False subfile.next_offset = None subfiles = [] while subfile.current_offset < subfile.size: _ = subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic( subfile.current_offset): try: size = parser.content_size // 8 if parser.content_size else None except Exception as ex: size = None try: description = parser.description if not ( parser.content_size ) or parser.content_size // 8 < filemaxsize else parser.__class__.__name__ except Exception as ex: description = None offset = offset // 8 # skip the first subfile # as its the original file itself if offset == 0: continue with open(filename, "rb") as fo: filedata = fo.read() mimetype = data_mimetype( filedata[offset:offset + size] ) if offset > 0 and size and size > 0 else None md5 = data_hashes( filedata[offset:offset + size], "md5") if offset >= 0 and size > 0 else None sha256 = data_hashes( filedata[offset:offset + size], "sha256") if (offset or offset == 0) and size else None ssdeep = data_hashes( filedata[offset:offset + size], "ssdeep") if (offset or offset == 0) and size else None subfiles.append({ "offset": offset, "size": size, "mimetype": mimetype, "description": description, "hashes": { "md5": md5, "sha256": sha256, "ssdeep": ssdeep } }) subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) except MemoryError: error("[!] Memory error!") return subfiles