def test_file_is_removed_from_disk():
    # once without reading the contents
    f = SecureTemporaryFile('/tmp')
    f.write(MESSAGE)
    assert os.path.exists(f.filepath)
    f.close()
    assert not os.path.exists(f.filepath)

    # once with reading the contents
    f = SecureTemporaryFile('/tmp')
    f.write(MESSAGE)
    f.read()
    assert os.path.exists(f.filepath)
    f.close()
    assert not os.path.exists(f.filepath)
Example #2
0
def save_file_submission(filesystem_id, count, journalist_filename, filename,
                         stream):
    sanitized_filename = secure_filename(filename)

    # We store file submissions in a .gz file for two reasons:
    #
    # 1. Downloading large files over Tor is very slow. If we can
    # compress the file, we can speed up future downloads.
    #
    # 2. We want to record the original filename because it might be
    # useful, either for context about the content of the submission
    # or for figuring out which application should be used to open
    # it. However, we'd like to encrypt that info and have the
    # decrypted file automatically have the name of the original
    # file. Given various usability constraints in GPG and Tails, this
    # is the most user-friendly way we have found to do this.

    encrypted_file_name = "{0}-{1}-doc.gz.gpg".format(
        count,
        journalist_filename)
    encrypted_file_path = path(filesystem_id, encrypted_file_name)
    with SecureTemporaryFile("/tmp") as stf:
        with gzip.GzipFile(filename=sanitized_filename,
                           mode='wb', fileobj=stf) as gzf:
            # Buffer the stream into the gzip file to avoid excessive
            # memory consumption
            while True:
                buf = stream.read(1024 * 8)
                if not buf:
                    break
                gzf.write(buf)

        crypto_util.encrypt(stf, config.JOURNALIST_KEY, encrypted_file_path)

    return encrypted_file_name
def test_tmp_file_id_omits_invalid_chars():
    """The `SecureTempFile.tmp_file_id` instance attribute is used as the filename
    for the secure temporary file. This attribute should not contain
    invalid characters such as '/' and '\0' (null)."""
    f = SecureTemporaryFile('/tmp')
    assert '/' not in f.tmp_file_id
    assert '\0' not in f.tmp_file_id
def test_file_seems_encrypted():
    f = SecureTemporaryFile('/tmp')
    f.write(MESSAGE)
    with io.open(f.filepath, 'rb') as fh:
        contents = fh.read()

    assert MESSAGE.encode('utf-8') not in contents
    assert MESSAGE not in contents.decode()
def test_write_then_read_then_write():
    f = SecureTemporaryFile('/tmp')
    f.write(MESSAGE)
    f.read()

    with pytest.raises(AssertionError) as err:
        f.write('be gentle to each other so we can be dangerous together')
    assert 'You cannot write after reading!' in str(err)
def test_buffered_read():
    f = SecureTemporaryFile('/tmp')
    msg = MESSAGE * 1000
    f.write(msg)
    out = b''
    while True:
        chars = f.read(1024)
        if chars:
            out += chars
        else:
            break

    assert out.decode('utf-8') == msg
Example #7
0
    def save_file_submission(
        self,
        filesystem_id: str,
        count: int,
        journalist_filename: str,
        filename: typing.Optional[str],
        stream: "IO[bytes]",
    ) -> str:

        if filename is not None:
            sanitized_filename = secure_filename(filename)
        else:
            sanitized_filename = secure_filename("unknown.file")

        # We store file submissions in a .gz file for two reasons:
        #
        # 1. Downloading large files over Tor is very slow. If we can
        # compress the file, we can speed up future downloads.
        #
        # 2. We want to record the original filename because it might be
        # useful, either for context about the content of the submission
        # or for figuring out which application should be used to open
        # it. However, we'd like to encrypt that info and have the
        # decrypted file automatically have the name of the original
        # file. Given various usability constraints in GPG and Tails, this
        # is the most user-friendly way we have found to do this.

        encrypted_file_name = "{0}-{1}-doc.gz.gpg".format(count, journalist_filename)
        encrypted_file_path = self.path(filesystem_id, encrypted_file_name)
        with SecureTemporaryFile("/tmp") as stf:  # nosec
            with gzip.GzipFile(filename=sanitized_filename, mode="wb", fileobj=stf, mtime=0) as gzf:
                # Buffer the stream into the gzip file to avoid excessive
                # memory consumption
                while True:
                    buf = stream.read(1024 * 8)
                    if not buf:
                        break
                    gzf.write(buf)

            EncryptionManager.get_default().encrypt_source_file(
                file_in=stf,
                encrypted_file_path_out=Path(encrypted_file_path),
            )

        return encrypted_file_name
Example #8
0
    def _secure_file_stream(self, total_content_length, content_type,
                            filename=None, content_length=None):
        """Storage class for data streamed in from requests.

        If the data is relatively small (512KB), just store it in
        memory. Otherwise, use the SecureTemporaryFile class to buffer
        it on disk, encrypted with an ephemeral key to mitigate
        forensic recovery of the plaintext.

        """
        if total_content_length > 1024 * 512:
            # We don't use `config.TEMP_DIR` here because that
            # directory is exposed via X-Send-File and there is no
            # reason for these files to be publicly accessible. See
            # note in `config.py` for more info. Instead, we just use
            # `/tmp`, which has the additional benefit of being
            # automatically cleared on reboot.
            return SecureTemporaryFile('/tmp')  # nosec
        return BytesIO()
def test_SecureTemporaryFile_is_a_STREAMLIKE_TYPE():
    assert _is_stream(SecureTemporaryFile('/tmp'))
def test_read_write_unicode():
    f = SecureTemporaryFile('/tmp')
    unicode_msg = '鬼神 Kill Em All 1989'
    f.write(unicode_msg)
    assert f.read().decode('utf-8') == unicode_msg
def test_write_then_read_twice():
    f = SecureTemporaryFile('/tmp')
    f.write(MESSAGE)
    assert f.read().decode('utf-8') == MESSAGE
    assert f.read() == b''
def test_read_before_writing():
    f = SecureTemporaryFile('/tmp')
    with pytest.raises(AssertionError) as err:
        f.read()
    assert 'You must write before reading!' in str(err)
Example #13
0
def test_read_write_unicode():
    f = SecureTemporaryFile("/tmp")
    unicode_msg = "鬼神 Kill Em All 1989"
    f.write(unicode_msg)
    assert f.read().decode("utf-8") == unicode_msg
Example #14
0
def test_write_then_read_twice():
    f = SecureTemporaryFile("/tmp")
    f.write(MESSAGE)
    assert f.read().decode("utf-8") == MESSAGE
    assert f.read() == b""