def unpack(filepath, contents=None, password=None, filename=None, duplicates=None): """Unpacks the file or contents provided.""" if contents: f = File(filepath, contents, filename=filename) else: f = File.from_path(filepath, filename=filename) if duplicates is None: duplicates = [] # Determine how we're going to unpack this file (if at all). It may not # have a file extension, e.g., when its filename is a hash. In those cases # we're going to take a look at the contents of the file. f.unpacker = Unpacker.guess(f) # Actually unpack any embedded files in this archive. if f.unpacker: plugin = plugins[f.unpacker](f) if plugin.supported(): f.children = plugin.unpack(password, duplicates) return f
def test_stream(): f = File(contents=b"foo1") assert f.filesize == 4 assert f.stream.read() == b"foo1" f = File(stream=io.BytesIO(b"foo2")) assert f.filesize == 4 assert f.stream.read() == b"foo2" fd, filepath = tempfile.mkstemp() os.write(fd, b"foobar") os.close(fd) f = File(stream=open(filepath, "rb")) assert f.filesize == 6 assert f.stream.read() == b"foobar" assert f.sha256.startswith("c3ab8ff13720e8ad9047") f = File(stream=io.BytesIO(b"hello world")) assert f.stream.read() == b"hello world" assert f.stream.read(5) == b"hello" f = File(stream=io.BytesIO(b"hello world")) s = f.stream assert s.read(6) == b"hello " assert s.read() == b"world" assert f.sha256.startswith("b94d27b9934d3e08a52e52d7da7da")
def unpack(filepath=None, contents=None, password=None, filename=None, duplicates=None): """Unpacks the file or contents provided.""" if duplicates is None: duplicates = [] if six.PY3: if isinstance(filepath, str) or isinstance(contents, str): raise IncorrectUsageException if isinstance(filename, str): raise IncorrectUsageException if contents: f = File(filepath, contents, filename=filename) else: f = File.from_path(filepath, filename=filename) Unpacker.single(f, password, duplicates) ident(f) return f
def test_extract5_relative_spf(): """ This test demonstrates the 7z unpacking using the -spf flag This flag enables 7z to enter an unsafe mode, it will try to write files to a relative directory. In this test Zipjail will catch the directory_traversal error """ buf = io.BytesIO() z = zipfile.ZipFile(buf, "w") z.writestr("foobarfilename", "A" * 1024) z.writestr("thisisfilename", "B" * 1024) z.close() contents = buf.getvalue().replace(b"thisisfilename", b"/../../../rela") f = File(None, contents, filename=None) filepath = f.temp_path(b".7z") dirpath = tempfile.mkdtemp() u = Zip7File(f) u.name = "7zfile" args = ["-spf"] with pytest.raises(UnpackException) as e: u.zipjail(filepath, dirpath, "x", "-mmt=off", "-o%s" % dirpath, filepath, *args) assert e.value.state == Errors.CANCELLED_DIR_TRAVERSAL assert len(os.listdir(dirpath)) == 1 filepath = os.path.join(dirpath, "foobarfilename") assert open(filepath, "rb").read() == b"A" * 1024
def locate_ole(self, contents): for idx in xrange(1024): try: obj = zlib.decompress(contents[idx:]) break except: pass else: raise UnpackException("GZIP stream not found") f = File(contents=obj) f.raise_no_ole("No OLE file found in MSO") return f.ole
def unpack(self, password=None, duplicates=None): entries = [] if self.f.filepath: filepath = self.f.filepath temporary = False else: filepath = self.f.temp_path() temporary = True p = peepdf.PDFCore.PDFParser() _, f = p.parse(filepath, forceMode=True, looseMode=True, manualAnalysis=False) for version in range(f.updates + 1): for obj in f.body[version].objects.values(): if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary): continue if "/F" not in obj.object.elements: continue if "/EF" not in obj.object.elements: continue filename = obj.object.elements["/F"] if not isinstance(filename, peepdf.PDFCore.PDFString): continue ref = obj.object.elements["/EF"] if not isinstance(ref, peepdf.PDFCore.PDFDictionary): continue if "/F" not in ref.elements: continue ref = ref.elements["/F"] if not isinstance(ref, peepdf.PDFCore.PDFReference): continue if ref.id not in f.body[version].objects: continue obj = f.body[version].objects[ref.id] contents = obj.object.decodedStream filename = filename.value if six.PY3: contents = contents.encode("latin-1") filename = filename.encode() entries.append( File(contents=contents, filename=filename, selected=False)) if temporary: os.unlink(filepath) if entries: self.f.preview = False return self.process(entries, duplicates)
def decrypt(self, password, archive, entry): try: archive.setpassword(password) return File( relapath=entry.filename, contents=archive.read(entry), password=password ) except (RuntimeError, zipfile.BadZipfile, OverflowError, zlib.error) as e: msg = e.message or e.args[0] if "Bad password" in msg: return if "Bad CRC-32" in msg: return if "password required" in msg: return if "Truncated file header" in msg: return if "invalid distance too far back" in msg: return if "cannot fit 'long' into" in msg: return raise UnpackException("Unknown zipfile error: %s" % e)
def decrypt(self, password, archive, entry): try: archive.setpassword(password) if six.PY3 and isinstance(entry.filename, str): entry.filename = entry.filename.encode() return File(relapath=entry.filename, contents=archive.read(entry), password=password) except (RuntimeError, zipfile.BadZipfile, OverflowError, zlib.error) as e: msg = getattr(e, "message", None) or e.args[0] if "Bad password" in msg: return if "Bad CRC-32" in msg: return if "password required" in msg: return if "Truncated file header" in msg: return if "invalid distance too far back" in msg: return if "cannot fit 'long' into" in msg: return if "Bad magic number for" in msg: return raise UnpackException("Unknown zipfile error: %s" % e)
def unpack(self, depth=0, password=None, duplicates=None): self.f.archive = True try: archive = tarfile.open(mode=self.mode, fileobj=self.f.stream) except tarfile.ReadError as e: # Raise not supported instead of setting invalid archive error # as .gzip archives are also recognized as tar.gz files. # this errors tells the unpack caller to try the next # supporting plugin. raise NotSupportedError(f"Invalid tar/tar.gz archive: {e}") entries, total_size = [], 0 for entry in archive: # Ignore anything that's not a file for now. if not entry.isfile() or entry.size < 0: continue # TODO Improve this. Also take precedence for native decompression # utilities over the Python implementation in the future. total_size += entry.size if total_size >= MAX_TOTAL_SIZE: self.f.set_error( Errors.TOTAL_TOO_LARGE, f"Unpacked archive size exceeds: {MAX_TOTAL_SIZE}") return [] entries.append( File(relapath=entry.path, contents=archive.extractfile(entry).read())) return self.process(entries, duplicates, depth)
def demux_office(filename, password): retlist = [] options = Config() aux_options = Config("auxiliary") tmp_path = options.cuckoo.get("tmppath", "/tmp") basename = os.path.basename(filename) target_path = os.path.join(tmp_path, "cuckoo-tmp/msoffice-crypt-tmp") if not os.path.exists(target_path): os.mkdir(target_path) decrypted_name = os.path.join(target_path, basename) if HAS_SFLOCK: ofile = OfficeFile(sfFile.from_path(filename)) d = ofile.decrypt(password) with open(decrypted_name, "w") as outs: outs.write(d.contents) # TODO add decryption verification checks if "Encrypted" not in d.magic: retlist.append(decrypted_name) else: raise CuckooDemuxError("MS Office decryptor not available") if not retlist: retlist.append(filename) return retlist
def unpack(self, password=None, duplicates=None): entries = [] if not self.f.ole: self.f.mode = "failed" self.f.error = "No OLE structure found" return [] if ["Details"] not in self.f.ole.listdir(): return [] details = self.decrypt(self.f.ole.openstream("Details").read()) config = ConfigParser.ConfigParser() config.readfp(io.BytesIO(details)) ole = self.f.ole for filename in ole.listdir(): if filename[0] == "Details" or not ole.get_size(filename[0]): continue entries.append( File(relapath=ntpath.basename( config.get(filename[0], "OriginalName")), contents=self.decrypt(ole.openstream( filename[0]).read()))) return self.process(entries, duplicates)
def unpack(self, password=None, duplicates=None): entries = [] e = email.message_from_string(self.f.contents) for part in e.walk(): if part.is_multipart(): continue if not part.get_filename() and \ part.get_content_type() in self.whitelisted_content_type: continue payload = part.get_payload(decode=True) if not payload: continue filename = part.get_filename() if filename: filename = unicode( email.header.make_header( email.header.decode_header(filename))) entries.append(File(relapath=filename or "att1", contents=payload)) return self.process(entries, duplicates)
def unpack(self, depth=0, password=None, duplicates=None): entries = [] if not self.f.ole: self.f.set_error(Errors.UNPACK_FAILED, "No OLE structure found") return [] if ["Details"] not in self.f.ole.listdir(): return [] details = self.decrypt( bytearray(self.f.ole.openstream("Details").read())) config = configparser.ConfigParser() config.read_string(details.decode()) ole = self.f.ole for filename in ole.listdir(): if filename[0] == "Details" or not ole.get_size(filename[0]): continue relapath = ntpath.basename(config.get(filename[0], "OriginalName")) entries.append( File(relapath=relapath, contents=self.decrypt( bytearray(ole.openstream(filename[0]).read())))) return self.process(entries, duplicates, depth)
def real_unpack(self, password, duplicates): entries = [] e = email.message_from_string(self.f.contents.decode("latin-1")) for part in e.walk(): if part.is_multipart(): continue if not part.get_filename() and part.get_content_type( ) in self.whitelisted_content_type: continue payload = part.get_payload(decode=True) if not payload: continue filename = part.get_filename() if filename: filename = email.header.make_header( email.header.decode_header(filename)) filename = str(filename).encode() entries.append(File(relapath=filename or b"att1", contents=payload)) return entries
def unpack(self, password=None, duplicates=None): try: archive = tarfile.open(mode=self.mode, fileobj=self.f.stream) except tarfile.ReadError as e: self.f.mode = "failed" self.f.error = e return [] entries, total_size = [], 0 for entry in archive: # Ignore anything that's not a file for now. if not entry.isfile() or entry.size < 0: continue # TODO Improve this. Also take precedence for native decompression # utilities over the Python implementation in the future. total_size += entry.size if total_size >= MAX_TOTAL_SIZE: self.f.error = "files_too_large" return [] relapath = entry.path.encode() entries.append( File(relapath=relapath, contents=archive.extractfile(entry).read())) return self.process(entries, duplicates)
def unpack(self, password=None, duplicates=None): entries = [] try: ole = olefile.OleFileIO(io.BytesIO(self.f.contents)) except IOError as e: self.f.mode = "failed" self.f.error = e return [] details = self.decrypt(ole.openstream("Details").read()) config = ConfigParser.ConfigParser() config.readfp(io.BytesIO(details)) for filename in ole.listdir(): if filename[0] == "Details" or not ole.get_size(filename[0]): continue entries.append( File(relapath=ntpath.basename( config.get(filename[0], "OriginalName")), contents=self.decrypt(ole.openstream( filename[0]).read()))) return self.process(entries, duplicates)
def _decrypt(self, archive, entry, password): try: archive.setpassword(password) return File(entry.filename, archive.read(entry), password=password) except RuntimeError as e: if "password required" not in e.args[0] and \ "Bad password" not in e.args[0]: raise UnpackException("Unknown zipfile error: %s" % e) # Bruteforce the password. First try all passwords that are known to # work and if that fails try our entire dictionary. return (self._bruteforce(archive, entry, self.known_passwords) or self._bruteforce(archive, entry, iter_passwords()) or File(entry.filename, None, mode="failed", description="Error decrypting file"))
def test_zipify4(): a = unpack("tests/files/tar_plain2.tar") b = unpack(File(contents=zipify(a)).temp_path()) assert len(a.children) == len(b.children) assert a.children[0].relapath == b.children[0].relapath assert a.children[0].contents == b.children[0].contents assert a.children[1].relapath == b.children[1].relapath assert a.children[1].contents == b.children[1].contents
def decrypt_blob(self, f): ret = [] # TODO Ensure that the assumption of "total size" being a 64-bit # integer is correct? for idx in range(0, struct.unpack("Q", f.read(8))[0], 0x1000): iv = self.get_hash(self.ei.key_data_salt + struct.pack("<I", idx), self.ei.key_data_hash_alg) aes = Cipher(algorithms.AES(self.secret_key), modes.CBC(iv[:16]), backend=default_backend()).decryptor() ret.append(aes.update(f.read(0x1000)) + aes.finalize()) return File(contents=b"".join(ret))
def unpack(filepath=None, contents=None, password=None, filename=None, duplicates=None): """Unpacks the file or contents provided.""" if duplicates is None: duplicates = [] if contents: f = File(filepath, contents, filename=filename) else: f = File.from_path(filepath, filename=filename) Unpacker.single(f, password, duplicates) ident(f) return f
def decrypt_blob(self, f): ret = [] # TODO Ensure that the assumption of "total size" being a 64-bit # integer is correct? for idx in xrange(0, struct.unpack("Q", f.read(8))[0], 0x1000): iv = self.get_hash(self.ei.key_data_salt + struct.pack("<I", idx), self.ei.key_data_hash_alg) aes = AES.new(self.secret_key, AES.MODE_CBC, iv[:16]) ret.append(aes.decrypt(f.read(0x1000))) return File(contents="".join(ret))
def test_pdf_is_embedded(): buf = io.BytesIO() z = zipfile.ZipFile(buf, "w") z.write("tests/files/pdf_docm.pdf") z.close() m = ZipFile(File(contents=buf.getvalue())) files = list(m.unpack()) assert len(files) == 1 assert files[0].package == "pdf" assert len(files[0].children) == 1 assert files[0].children[0].package == "doc"
def get_new_emails(db): imaplib.IMAP4.debug = imaplib.IMAP4_SSL.debug = 1 conn = imaplib.IMAP4_SSL(email_config.cuckoomx.get("server")) conn.login(email_config.cuckoomx.get("user"), email_config.cuckoomx.get("password")) conn.select("Inbox") (retcode, messages) = conn.search(None, "(UNSEEN)") if retcode == "OK" and messages: for num in messages[0].split(" "): if num: typ, data = conn.fetch(num,"(RFC822)") msg = email.message_from_string(data[0][1]) if msg: email_dict = dict() email_dict["Attachments"] = list() for k, v in msg.items(): email_dict[k] = v if email_dict.get("Subject", ""): print("[+] Procesing email with Subject: {0}".format(email_dict["Subject"])) for part in msg.walk(): attachment = False if part.get_filename(): filename = part.get_filename() content_type = part.get_content_type() attachment = part.get_payload(decode=True) sha256 = hashlib.sha256(attachment).hexdigest() if attachment: #unpack it z = ZipFile(File(contents=attachment, password=email_config.cuckoomx.get("archive_password"))) files = list(z.unpack(password=email_config.cuckoomx.get("archive_password"), duplicates=[])) for file in files: new_file = db.query(CUCKOOMX).filter(CUCKOOMX.sha256 == file.sha256).first() if new_file is None: new_file = CUCKOOMX(sha256=file.sha256) temp_file_path = store_temp_file(file.contents, file.filename) task_id = main_db.add_path( file_path=temp_file_path ) new_file.cuckoo_id = task_id new_file.email = email_dict.get("From", "") db.add(new_file) db.commit() else: send_notification(db, new_file) #mark as seen typ, data = conn.store(num,"+FLAGS","\Seen") conn.close() conn.logout()
def handles(self): if self.f.filename and self.f.filename.lower().endswith(self.exts): return True if not self.f.filesize: return False try: f = File(contents=gzip.GzipFile(fileobj=self.f.stream).read()) except IOError: return False return self.magic in f.magic
def handles(self): if self.f.filename and self.f.filename.lower().endswith(self.exts): return True if not self.f.filesize: return False try: f = File(contents=bz2.decompress(self.f.contents)) except IOError: return False return self.magic in f.magic
def _bruteforce(self, archive, entry, passwords): for password in passwords: try: archive.setpassword(password) ret = File(relapath=entry.filename, contents=archive.read(entry), password=password) self.known_passwords.add(password) return ret except (RuntimeError, zipfile.BadZipfile) as e: msg = e.message or e.args[0] if "Bad password" not in msg and "Bad CRC-32" not in msg: raise UnpackException("Unknown zipfile error: %s" % e)
def test_identify(): assert identify(File(b"tests/files/script.js")) == "js" assert identify(File(b"tests/files/script.wsf")) == "wsf" assert identify(File(b"tests/files/script.vbs")) == "vbs" assert identify(File(b"tests/files/script.ps1")) == "ps1" f = unpack(contents=open("tests/files/sample.jar", "rb").read()) assert f.package == "jar" f = unpack(contents=open("tests/files/sample.apk", "rb").read()) assert f.package == "apk" assert identify(File(b"tests/files/maldoc_office.htm")) == "doc" assert identify(File(b"tests/files/maldoc.xls")) == "xls" assert identify(File(b"tests/files/test.hta_")) == "hta"
def handles(self): if self.f.filename.lower().endswith(self.exts): return True if not self.f.contents: return False try: contents = gzip.GzipFile(fileobj=io.BytesIO(self.f.contents)) f = File(contents=contents.read()) except IOError: return False return self.magic in f.magic
def unpack(filepath, contents=None, password=None): """Unpacks the file or contents provided.""" if contents: f = File(filepath, contents) else: f = File.from_path(filepath) duplicates = [] # Determine how we're going to unpack this file (if at all). It may not # have a file extension, e.g., when its filename is a hash. In those cases # we're going to take a look at the contents of the file. unpacker = picker(filepath) if not unpacker and f.get_signature(): unpacker = f.get_signature()["unpacker"] # Actually unpack any embedded files in this archive. if unpacker: plugin = plugins[unpacker](f) if plugin.supported(): f.children = plugin.unpack(password, duplicates) return f
def parse_ole10_native(self, ole, name): def parse_string(off): ret = stream[off:stream.find("\x00", off)] return off + len(ret) + 1, ret stream = self.get_stream(ole, "\x01Ole10Native") off, filename = parse_string(6) off, filepath = parse_string(off) off, tempname = parse_string(off + 8) embed = struct.unpack("I", stream[off:off + 4])[0] self.entries.append( File(relapath=filename, contents=stream[off + 4:off + 4 + embed], selected=False))
def unpack(filepath=None, contents=None, password=None, filename=None, duplicates=None): """Unpacks the file or contents provided.""" if contents: f = File(filepath, contents, filename=filename) else: f = File.from_path(filepath, filename=filename) if duplicates is None: duplicates = [] # Determine how we're going to unpack this file (if at all). It may not # have a file extension, e.g., when its filename is a hash. In those cases # we're going to take a look at the contents of the file. f.unpacker = Unpacker.guess(f) # Actually unpack any embedded files in this archive. if f.unpacker: plugin = plugins[f.unpacker](f) if plugin.supported(): f.children = plugin.unpack(password, duplicates) return f
def unpack(self, duplicates=None): if self.f.contents: archive = self._open_stream(self.f.contents, mode=self.mode) else: archive = self._open_path(self.f.filepath) duplicates = duplicates or [] entries = [] for entry in archive: # Ignore anything that's not a file for now. if not entry.isfile(): continue f = File(entry.path, archive.extractfile(entry).read()) if f.sha256 not in duplicates: duplicates.append(f.sha256) else: f.duplicate = True entries.append(f) return self.process(entries, duplicates)
def f(filename): return File.from_path("tests/files/%s" % filename)