def test_file_seems_encrypted(): f = SecureTemporaryFile('/tmp') f.write(MESSAGE) with io.open(f.filepath, 'rb') as fh: contents = fh.read() assert MESSAGE.encode('utf-8') not in contents assert MESSAGE not in contents.decode()
def test_buffered_read(): f = SecureTemporaryFile('/tmp') msg = MESSAGE * 1000 f.write(msg) out = b'' while True: chars = f.read(1024) if chars: out += chars else: break assert out.decode('utf-8') == msg
def test_write_then_read_then_write(): f = SecureTemporaryFile('/tmp') f.write(MESSAGE) f.read() with pytest.raises(AssertionError) as err: f.write('be gentle to each other so we can be dangerous together') assert 'You cannot write after reading!' in str(err)
def save_file_submission(filesystem_id, count, journalist_filename, filename, stream): sanitized_filename = secure_filename(filename) # We store file submissions in a .gz file for two reasons: # # 1. Downloading large files over Tor is very slow. If we can # compress the file, we can speed up future downloads. # # 2. We want to record the original filename because it might be # useful, either for context about the content of the submission # or for figuring out which application should be used to open # it. However, we'd like to encrypt that info and have the # decrypted file automatically have the name of the original # file. Given various usability constraints in GPG and Tails, this # is the most user-friendly way we have found to do this. encrypted_file_name = "{0}-{1}-doc.gz.gpg".format( count, journalist_filename) encrypted_file_path = path(filesystem_id, encrypted_file_name) with SecureTemporaryFile("/tmp") as stf: with gzip.GzipFile(filename=sanitized_filename, mode='wb', fileobj=stf) as gzf: # Buffer the stream into the gzip file to avoid excessive # memory consumption while True: buf = stream.read(1024 * 8) if not buf: break gzf.write(buf) crypto_util.encrypt(stf, config.JOURNALIST_KEY, encrypted_file_path) return encrypted_file_name
def test_tmp_file_id_omits_invalid_chars(): """The `SecureTempFile.tmp_file_id` instance attribute is used as the filename for the secure temporary file. This attribute should not contain invalid characters such as '/' and '\0' (null).""" f = SecureTemporaryFile('/tmp') assert '/' not in f.tmp_file_id assert '\0' not in f.tmp_file_id
def save_file_submission( self, filesystem_id: str, count: int, journalist_filename: str, filename: typing.Optional[str], stream: "IO[bytes]", ) -> str: if filename is not None: sanitized_filename = secure_filename(filename) else: sanitized_filename = secure_filename("unknown.file") # We store file submissions in a .gz file for two reasons: # # 1. Downloading large files over Tor is very slow. If we can # compress the file, we can speed up future downloads. # # 2. We want to record the original filename because it might be # useful, either for context about the content of the submission # or for figuring out which application should be used to open # it. However, we'd like to encrypt that info and have the # decrypted file automatically have the name of the original # file. Given various usability constraints in GPG and Tails, this # is the most user-friendly way we have found to do this. encrypted_file_name = "{0}-{1}-doc.gz.gpg".format(count, journalist_filename) encrypted_file_path = self.path(filesystem_id, encrypted_file_name) with SecureTemporaryFile("/tmp") as stf: # nosec with gzip.GzipFile(filename=sanitized_filename, mode="wb", fileobj=stf, mtime=0) as gzf: # Buffer the stream into the gzip file to avoid excessive # memory consumption while True: buf = stream.read(1024 * 8) if not buf: break gzf.write(buf) EncryptionManager.get_default().encrypt_source_file( file_in=stf, encrypted_file_path_out=Path(encrypted_file_path), ) return encrypted_file_name
def _secure_file_stream(self, total_content_length, content_type, filename=None, content_length=None): """Storage class for data streamed in from requests. If the data is relatively small (512KB), just store it in memory. Otherwise, use the SecureTemporaryFile class to buffer it on disk, encrypted with an ephemeral key to mitigate forensic recovery of the plaintext. """ if total_content_length > 1024 * 512: # We don't use `config.TEMP_DIR` here because that # directory is exposed via X-Send-File and there is no # reason for these files to be publicly accessible. See # note in `config.py` for more info. Instead, we just use # `/tmp`, which has the additional benefit of being # automatically cleared on reboot. return SecureTemporaryFile('/tmp') # nosec return BytesIO()
def test_SecureTemporaryFile_is_a_STREAMLIKE_TYPE(): assert _is_stream(SecureTemporaryFile('/tmp'))
def test_file_is_removed_from_disk(): # once without reading the contents f = SecureTemporaryFile('/tmp') f.write(MESSAGE) assert os.path.exists(f.filepath) f.close() assert not os.path.exists(f.filepath) # once with reading the contents f = SecureTemporaryFile('/tmp') f.write(MESSAGE) f.read() assert os.path.exists(f.filepath) f.close() assert not os.path.exists(f.filepath)
def test_read_write_unicode(): f = SecureTemporaryFile('/tmp') unicode_msg = '鬼神 Kill Em All 1989' f.write(unicode_msg) assert f.read().decode('utf-8') == unicode_msg
def test_write_then_read_twice(): f = SecureTemporaryFile('/tmp') f.write(MESSAGE) assert f.read().decode('utf-8') == MESSAGE assert f.read() == b''
def test_read_before_writing(): f = SecureTemporaryFile('/tmp') with pytest.raises(AssertionError) as err: f.read() assert 'You must write before reading!' in str(err)
def test_read_write_unicode(): f = SecureTemporaryFile("/tmp") unicode_msg = "鬼神 Kill Em All 1989" f.write(unicode_msg) assert f.read().decode("utf-8") == unicode_msg
def test_write_then_read_twice(): f = SecureTemporaryFile("/tmp") f.write(MESSAGE) assert f.read().decode("utf-8") == MESSAGE assert f.read() == b""