def EXTRACT_EMBEDDED(s, buff): EXTRACT_FILES = {} CHILD_BUFF = {} stream = StringInputStream(buff) subfile = SearchSubfile(stream) subfile.loadParsers(categories=None, parser_ids=None) subfile.stats = {} subfile.next_offset = None counter = 0 last_start = 0 last_end = 0 while subfile.current_offset < subfile.size: subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic(subfile.current_offset): # Don't care about extracting the base file, just what's within it # False positives often return sizes exceeding the size of the file # they also may not even posess a content size at all, weed em out if ( offset != 0 and parser.content_size != subfile.size and parser.content_size < subfile.size and parser.content_size ): start = offset // 8 end = start + parser.content_size // 8 # We want to make sure we aren't pulling sub files out of ones # we are already extracting, we will be doing that later anyway # when the module is run again on the 'buffer' returned key value if start >= last_end: EXTRACT_FILES["Object_%s" % counter] = OrderedDict( [ ("Start", "%s bytes" % start), ("End", "%s bytes" % end), ("Description", parser.description), ("Buffer", buff[start:end]), ] ) counter += 1 last_start = start last_end = end subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) return EXTRACT_FILES
def EXTRACT_EMBEDDED(s, buff): EXTRACT_FILES = {} CHILD_BUFF = {} stream = StringInputStream(buff) subfile = SearchSubfile(stream) subfile.loadParsers(categories=None, parser_ids=None) subfile.stats = {} subfile.next_offset = None counter = 0 last_start = 0 last_end = 0 while subfile.current_offset < subfile.size: subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic(subfile.current_offset): # Don't care about extracting the base file, just what's within it # False positives often return sizes exceeding the size of the file # they also may not even posess a content size at all, weed em out if offset != 0 and parser.content_size != subfile.size \ and parser.content_size < subfile.size and parser.content_size: start = offset // 8 end = start + parser.content_size // 8 # We want to make sure we aren't pulling sub files out of ones # we are already extracting, we will be doing that later anyway # when the module is run again on the 'buffer' returned key value if start >= last_end: EXTRACT_FILES['Object_%s' % counter] = OrderedDict([ ('Start', '%s bytes' % start), ('End', '%s bytes' % end), ('Description', parser.description), ('Buffer', buff[start:end]) ]) counter += 1 last_start = start last_end = end subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) return EXTRACT_FILES
def file_subfiles(filename): if filename and filename != "": offset, size, memorylimit, filemaxsize = 0, 999999, 50 * 1024 * 1024, 100 * 1024 * 1024 stream = FileInputStream(unicodeFilename(filename), real_filename=filename) subfile = SearchSubfile(stream, offset, size) try: subfile.loadParsers() subfile.stats = {} subfile.verbose = False subfile.next_offset = None subfiles = [] while subfile.current_offset < subfile.size: _ = subfile.datarate.update(subfile.current_offset) for offset, parser in subfile.findMagic( subfile.current_offset): try: size = parser.content_size // 8 if parser.content_size else None except Exception as ex: size = None try: description = parser.description if not ( parser.content_size ) or parser.content_size // 8 < filemaxsize else parser.__class__.__name__ except Exception as ex: description = None offset = offset // 8 # skip the first subfile # as its the original file itself if offset == 0: continue with open(filename, "rb") as fo: filedata = fo.read() mimetype = data_mimetype( filedata[offset:offset + size] ) if offset > 0 and size and size > 0 else None md5 = data_hashes( filedata[offset:offset + size], "md5") if offset >= 0 and size > 0 else None sha256 = data_hashes( filedata[offset:offset + size], "sha256") if (offset or offset == 0) and size else None ssdeep = data_hashes( filedata[offset:offset + size], "ssdeep") if (offset or offset == 0) and size else None subfiles.append({ "offset": offset, "size": size, "mimetype": mimetype, "description": description, "hashes": { "md5": md5, "sha256": sha256, "ssdeep": ssdeep } }) subfile.current_offset += subfile.slice_size if subfile.next_offset: subfile.current_offset = max(subfile.current_offset, subfile.next_offset) subfile.current_offset = min(subfile.current_offset, subfile.size) except MemoryError: error("[!] Memory error!") return subfiles