def zipjail(self, filepath, dirpath, *args): zipjail = data_file("zipjail.elf") arg = "--clone=10" if self.name == "7zfile" else "--clone=0" if os.path.exists(dirpath): shutil.rmtree(dirpath) p = subprocess.Popen( (zipjail, filepath, dirpath, arg, "--", self.exe) + args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return_code = p.wait() out, err = p.communicate() low_err = err.lower() if b"excessive writing caused incomplete unpacking!" in low_err: raise UnpackException( "Cancelled: unpacked archive exceeds maximum size", Errors.TOTAL_TOO_LARGE) if any( x in low_err for x in (b"detected potential out-of-path arbitrary overwrite", b"Detected potential directory traversal arbitrary overwrite")): raise UnpackException( "Cancelled: directory traversal attempt detected", Errors.CANCELLED_DIR_TRAVERSAL) if b"blocked system call" in low_err \ and b"syscall=symlink" in low_err or \ b"potential symlink-based arbitrary overwrite" in low_err: raise UnpackException( "Cancelled: symlink creation attempt detected", Errors.CANCELLED_SYMLINK) if any(x in low_err for x in (b"wrong password", b"bad password", b"password is incorrect", b"password required")): raise DecryptionFailedError( "No correct password for encrypted archive") if b"unknown lstat() errno" in low_err: # Handle unknown lstat errors as if the unpacking tool does # not supported the current file and allow another unpacker to # be chosen. raise NotSupportedError(f"Zipjail error: {err}") if return_code == 1: raise UnpackException(f"Zipjail error: {err}", Errors.ZIPJAIL_FAIL) return not return_code
def locate_ole(self, contents): for idx in xrange(1024): try: obj = zlib.decompress(contents[idx:]) break except: pass else: raise UnpackException("GZIP stream not found") try: return olefile.OleFileIO(io.BytesIO(obj)) except IOError as e: raise UnpackException(e)
def unpack(self, password=None, duplicates=None): dirpath = tempfile.mkdtemp() if password: raise UnpackException( "Currently password-protected .7z files are not supported " "due to a ZipJail-related monitoring issue (namely, due to " "7z calling clone(2) when a password has been provided).") if self.f.filepath: filepath = self.f.filepath temporary = False else: filepath = self.f.temp_path(b".7z") temporary = True ret = self.zipjail(filepath, dirpath, "x", "-mmt=off", "-o%s" % dirpath, filepath) if not ret: return [] if temporary: os.unlink(filepath) return self.process_directory(dirpath, duplicates)
def decrypt(self, password, archive, entry): try: archive.setpassword(password) if six.PY3 and isinstance(entry.filename, str): entry.filename = entry.filename.encode() return File(relapath=entry.filename, contents=archive.read(entry), password=password) except (RuntimeError, zipfile.BadZipfile, OverflowError, zlib.error) as e: msg = getattr(e, "message", None) or e.args[0] if "Bad password" in msg: return if "Bad CRC-32" in msg: return if "password required" in msg: return if "Truncated file header" in msg: return if "invalid distance too far back" in msg: return if "cannot fit 'long' into" in msg: return if "Bad magic number for" in msg: return raise UnpackException("Unknown zipfile error: %s" % e)
def process_directory(self, dirpath, duplicates, depth, password=None): """Enumerates a directory, removes the directory, and returns data after calling the process function.""" entries = [] if duplicates is None: duplicates = [] if self.f: self.f.clear_error() if not os.listdir(dirpath): shutil.rmtree(dirpath) raise UnpackException("Extraction directory was empty", Errors.NOTHING_EXTRACTED) for dirpath2, dirnames, filepaths in os.walk(dirpath): for filepath in filepaths: filepath = os.path.join(dirpath2, filepath) entries.append( File(relapath=filepath[len(dirpath) + 1:], password=password, contents=open(filepath, "rb").read())) shutil.rmtree(dirpath) return self.process(entries, duplicates, depth)
def unpack(self, password=None, duplicates=None): dirpath = tempfile.mkdtemp() if password: raise UnpackException( "Currently password-protected .7z files are not supported " "due to a ZipJail-related monitoring issue (namely, due to " "7z calling clone(2) when a password has been provided).") if self.f.filepath: filepath = self.f.filepath temporary = False else: filepath = self.f.temp_path(".7z") temporary = True try: subprocess.check_output([ self.zipjail, filepath, dirpath, self.exe, "x", "-mmt=off", "-o%s" % dirpath, filepath, ]) except subprocess.CalledProcessError as e: self.f.mode = "failed" self.f.error = e if temporary: os.unlink(filepath) return self.process_directory(dirpath, duplicates)
def decrypt(self, password, archive, entry): try: archive.setpassword(password) return File( relapath=entry.filename, contents=archive.read(entry), password=password ) except (RuntimeError, zipfile.BadZipfile, OverflowError, zlib.error) as e: msg = e.message or e.args[0] if "Bad password" in msg: return if "Bad CRC-32" in msg: return if "password required" in msg: return if "Truncated file header" in msg: return if "invalid distance too far back" in msg: return if "cannot fit 'long' into" in msg: return raise UnpackException("Unknown zipfile error: %s" % e)
def unpack(self, duplicates=None): dirpath = tempfile.mkdtemp() try: subprocess.check_call([ self.zipjail, self.f.filepath, dirpath, self.exe, "x", "-mt1", self.f.filepath, dirpath, ]) except subprocess.CalledProcessError as e: raise UnpackException(e) return self.process_directory(dirpath, duplicates)
def locate_ole(self, contents): for idx in xrange(1024): try: obj = zlib.decompress(contents[idx:]) break except: pass else: raise UnpackException("GZIP stream not found") f = File(contents=obj) f.raise_no_ole("No OLE file found in MSO") return f.ole
def _bruteforce(self, archive, entry, passwords): for password in passwords: try: archive.setpassword(password) ret = File(relapath=entry.filename, contents=archive.read(entry), password=password) self.known_passwords.add(password) return ret except (RuntimeError, zipfile.BadZipfile) as e: msg = e.message or e.args[0] if "Bad password" not in msg and "Bad CRC-32" not in msg: raise UnpackException("Unknown zipfile error: %s" % e)
def decrypt(self, password, archive, entry): if entry.header_offset < 0: raise InvalidZipEntryError( f"Negative header offset, cannot unpack this file" ) try: if password: archive.setpassword(password.encode()) relapath = entry.filename.lstrip("/") if not relapath: raise InvalidZipEntryError( "Filename empty after stripping absolute path" ) return File( relapath=relapath, contents=archive.read(entry), password=password ) except (RuntimeError, zipfile.BadZipFile, OverflowError, zlib.error, UnicodeDecodeError) as e: msg = str(e) if any(x in msg for x in ("Bad password", "password required")): raise DecryptionFailedError( "No correct password for encrypted archive" ) if any(x in msg for x in ( "compression method is not supported", "compression type 99", "strong encryption (flag bit 6)" )): raise NotSupportedError( "7z is required to unpack this ZIP archive" ) skippable = ("Bad CRC-32", "Truncated file header", "invalid distance too far back", "cannot fit 'long' into", "Bad magic number for", "'utf-8' codec can't decode byte", "invalid stored block lengths") if any(x in msg for x in skippable): raise InvalidZipEntryError(msg) raise UnpackException(f"Unknown zipfile error: {e}")
def _decrypt(self, archive, entry, password): try: archive.setpassword(password) return File(entry.filename, archive.read(entry), password=password) except RuntimeError as e: if "password required" not in e.args[0] and \ "Bad password" not in e.args[0]: raise UnpackException("Unknown zipfile error: %s" % e) # Bruteforce the password. First try all passwords that are known to # work and if that fails try our entire dictionary. return (self._bruteforce(archive, entry, self.known_passwords) or self._bruteforce(archive, entry, iter_passwords()) or File(entry.filename, None, mode="failed", description="Error decrypting file"))
def raise_no_ole(self, message): if self.ole is None: raise UnpackException(message)
def unpack(self, depth=0, password=None, duplicates=None): self.f.archive = True try: archive = zipfile.ZipFile(self.f.stream) except (zipfile.BadZipFile, IOError) as e: self.f.set_error(Errors.INVALID_ARCHIVE, str(e)) return [] entries, directories, total_size = [], [], 0 illegal = ("..", ":", "\x00") for entry in archive.infolist(): if entry.filename.endswith("/") or entry.file_size < 0: continue # TODO We should likely move this to self.process(), assuming # this is also an issue with other archive formats. if not entry.filename.strip(): continue if any(c in entry.filename for c in illegal): raise UnpackException( f"Illegal character(s) in file path", Errors.CANCELLED_DIR_TRAVERSAL ) if stat.S_ISLNK(entry.external_attr >> 16): raise UnpackException( "Cancelled: symlink creation attempt detected", Errors.CANCELLED_SYMLINK ) # TODO Improve this. Also take precedence for native decompression # utilities over the Python implementation in the future. total_size += entry.file_size if total_size >= MAX_TOTAL_SIZE: self.f.set_error( Errors.TOTAL_TOO_LARGE, f"Unpacked archive size exceeds maximum of: " f"{MAX_TOTAL_SIZE}" ) return [] try: f = self.bruteforce(password, archive, entry) # We stop unpacking if decryption of one entry failed and # we have tried all passwords. except InvalidZipEntryError as e: # We do not stop unpacking if an entry in the archive is # invalid. Mark the invalid entry and continue. f = File(relapath=entry.filename, mode="failed") f.error = f"Could not unpack: {e}" entries.append(f) if entries[-1].relaname: directories.append(os.path.dirname(entries[-1].relaname)) # This fixes an issue when a directory name is identified as "foo" # instead of "foo/" as required by zipfile (and likely the majority # of other .zip implementations). The issue being "foo" being created # as an empty file rather than a directory. # TODO We should likely move this to self.process(), assuming this # is also an issue with other archive formats. for idx, entry in enumerate(entries[:]): if entry.relaname in directories: entries.pop(idx) return self.process(entries, duplicates, depth)
def unpack(self, depth=0, password=None, duplicates=None): entries = [] if self.f.filepath: filepath = self.f.filepath temporary = False else: filepath = self.f.temp_path() temporary = True p = peepdf.PDFCore.PDFParser() try: _, f = p.parse(filepath, forceMode=True, looseMode=True, manualAnalysis=False) except Exception as e: raise UnpackException(f"peepdf parsing failure: {e}") for version in range(f.updates + 1): for obj in f.body[version].objects.values(): if not isinstance(obj.object, peepdf.PDFCore.PDFDictionary): continue el = obj.object.elements if "/F" not in el and "/UF" not in el: continue if "/EF" not in el: continue filename = el.get("/F") or el.get("/UF") if not isinstance(filename, peepdf.PDFCore.PDFString): continue ref = el["/EF"] if not isinstance(ref, peepdf.PDFCore.PDFDictionary): continue if "/F" not in ref.elements: continue ref = ref.elements["/F"] if not isinstance(ref, peepdf.PDFCore.PDFReference): continue if ref.id not in f.body[version].objects: continue obj = f.body[version].objects[ref.id] contents = obj.object.decodedStream.encode("latin-1") filename = filename.value entries.append( File(relapath=filename, contents=contents, filename=filename, selected=False)) if temporary: os.unlink(filepath) return self.process(entries, duplicates, depth)