Ejemplo n.º 1
0
def update_hdf5(data, path, prefix="/"):
    # ToDo: drop this wrapper
    assert isinstance(path, (str, Path, hdf5.h5py.File))
    if isinstance(path, hdf5.h5py.File):
        hdf5.update_hdf5(data, path, path=prefix)
    else:
        path = normalize_path(normalize_path, as_str=True, allow_fd=False)
        hdf5.update_hdf5(data, path, path=prefix)
Ejemplo n.º 2
0
def dump(
    obj,
    path,
    mkdir=False,
    mkdir_parents=False,
    mkdir_exist_ok=False,  # Should this be an option? Should the default be True?
    unsafe=False,  # Should this be an option? Should the default be True?
    # atomic=False,  ToDo: Add atomic support
    **kwargs,
):
    """
    A generic dump function to write the obj to path.

    Infer the dump protocol (e.g. json, pickle, ...) from the path name.

    Supported formats:
     - Text:
       - json
       - yaml
     - Binary:
       - pkl: pickle
       - dill
       - h5: HDF5
       - wav
       - mat: MATLAB
       - npy: Numpy
       - npz: Numpy compressed
       - pth: Pickle with Pytorch support
     - Compressed:
       - json.gz
       - pkl.gz
       - npy.gz

    Args:
        obj: Arbitrary object that is supported from the dump protocol.
        path: str or pathlib.Path
        mkdir:
            Whether to make an mkdir id the parent dir of path does not exist.
        mkdir_parents:
        mkdir_exist_ok:
        unsafe:
            Allow unsafe dump protocol. This option is more relevant for load.
        **kwargs:
            Forwarded arguments to the particular dump function.
            Should rarely be used, because when a special property of the dump
            function/protocol is used, use directly that dump function.

    Returns:

    """
    path = normalize_path(path, allow_fd=False)
    if mkdir:
        if mkdir_exist_ok:
            # Assume that in most cases the dir exists.
            # -> try first to reduce io requests
            try:
                return dump(obj, path, unsafe=unsafe, **kwargs)
            except FileNotFoundError:
                pass
        path.parent.mkdir(parents=mkdir_parents, exist_ok=mkdir_exist_ok)

    if str(path).endswith(".json"):
        from paderbox.io import dump_json
        dump_json(obj, path, **kwargs)
    elif str(path).endswith(".pkl"):
        assert unsafe, (unsafe, path)
        with path.open("wb") as fp:
            pickle.dump(obj, fp, protocol=pickle.HIGHEST_PROTOCOL, **kwargs)
    elif str(path).endswith(".dill"):
        assert unsafe, (unsafe, path)
        with path.open("wb") as fp:
            import dill
            dill.dump(obj, fp, **kwargs)
    elif str(path).endswith(".h5"):
        from paderbox.io.hdf5 import dump_hdf5
        dump_hdf5(obj, path, **kwargs)
    elif str(path).endswith(".yaml"):
        if unsafe:
            from paderbox.io.yaml_module import dump_yaml_unsafe
            dump_yaml_unsafe(obj, path, **kwargs)
        else:
            from paderbox.io.yaml_module import dump_yaml
            dump_yaml(obj, path, **kwargs)
    elif str(path).endswith(".gz"):
        assert len(kwargs) == 0, kwargs
        with gzip.GzipFile(path, 'wb', compresslevel=1) as f:
            if str(path).endswith(".json.gz"):
                f.write(json.dumps(obj).encode())
            elif str(path).endswith(".pkl.gz"):
                assert unsafe, (unsafe, path)
                pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
            elif str(path).endswith(".npy.gz"):
                np.save(f, obj, allow_pickle=unsafe)
            else:
                raise ValueError(path)
    elif str(path).endswith(".wav"):
        from paderbox.io import dump_audio
        if np.ndim(obj) == 1:
            pass
        elif np.ndim(obj) == 2:
            assert np.shape(obj)[0] < 20, (np.shape(obj), obj)
        else:
            raise AssertionError(('Expect ndim in [1, 2]', np.shape(obj), obj))
        with path.open("wb") as fp:  # Throws better exception msg
            dump_audio(obj, fp, **kwargs)
    elif str(path).endswith('.mat'):
        import scipy.io as sio
        sio.savemat(path, obj, **kwargs)
    elif str(path).endswith('.npy'):
        np.save(str(path), obj, allow_pickle=unsafe, **kwargs)
    elif str(path).endswith('.npz'):
        assert unsafe, (unsafe, path)
        assert len(kwargs) == 0, kwargs
        if isinstance(obj, dict):
            np.savez(str(path), **obj)
        else:
            np.savez(str(path), obj)
    elif str(path).endswith('.pth'):
        assert unsafe, (unsafe, path)
        import torch
        torch.save(obj, str(path), **kwargs)
    else:
        raise ValueError('Unsupported suffix:', path)
Ejemplo n.º 3
0
def load_audio(
    path,
    *,
    frames=-1,
    start=0,
    stop=None,
    dtype=np.float64,
    fill_value=None,
    expected_sample_rate=None,
    unit='samples',
    return_sample_rate=False,
):
    """
    WIP will deprecate audioread in the future

    Difference to soundfile.read:
     - Default: Return only signal
     - With the argument "unit" the unit of frames, start and stop can be
       changed (stop currently unsupported).
     - With given expected_sample_rate an assert is included (recommended)

    soundfile.read doc text and some examples:

    Provide audio data from a sound file as NumPy array.

    By default, the whole file is read from the beginning, but the
    position to start reading can be specified with `start` and the
    number of frames to read can be specified with `frames`.
    Alternatively, a range can be specified with `start` and `stop`.

    If there is less data left in the file than requested, the rest of
    the frames are filled with `fill_value`.
    If no `fill_value` is specified, a smaller array is returned.

    Parameters
    ----------
    path : str or int or file-like object
        The file to read from.  See :class:`SoundFile` for details.
    frames : int, optional
        The number of frames to read. If `frames` is negative, the whole
        rest of the file is read.  Not allowed if `stop` is given.
    start : int, optional
        Where to start reading.  A negative value counts from the end.
    stop : int, optional
        The index after the last frame to be read.  A negative value
        counts from the end.  Not allowed if `frames` is given.
    dtype : {'float64', 'float32', 'int32', 'int16'}, optional
        Data type of the returned array, by default ``'float64'``.
        Floating point audio data is typically in the range from
        ``-1.0`` to ``1.0``.  Integer data is in the range from
        ``-2**15`` to ``2**15-1`` for ``'int16'`` and from ``-2**31`` to
        ``2**31-1`` for ``'int32'``.

        .. note:: Reading int values from a float file will *not*
            scale the data to [-1.0, 1.0). If the file contains
            ``np.array([42.6], dtype='float32')``, you will read
            ``np.array([43], dtype='int32')`` for ``dtype='int32'``.

    Returns
    -------
    audiodata : numpy.ndarray or type(out)
        A two-dimensional (frames x channels) NumPy array is returned.
        If the sound file has only one channel, a one-dimensional array
        is returned.  Use ``always_2d=True`` to return a two-dimensional
        array anyway.

        If `out` was specified, it is returned.  If `out` has more
        frames than available in the file (or if `frames` is smaller
        than the length of `out`) and no `fill_value` is given, then
        only a part of `out` is overwritten and a view containing all
        valid frames is returned.

    Other Parameters
    ----------------
    always_2d : bool, optional
        By default, reading a mono sound file will return a
        one-dimensional array.  With ``always_2d=True``, audio data is
        always returned as a two-dimensional array, even if the audio
        file has only one channel.
    fill_value : float, optional
        If more frames are requested than available in the file, the
        rest of the output is be filled with `fill_value`.  If
        `fill_value` is not specified, a smaller array is returned.
    out : numpy.ndarray or subclass, optional
        If `out` is specified, the data is written into the given array
        instead of creating a new array.  In this case, the arguments
        `dtype` and `always_2d` are silently ignored!  If `frames` is
        not given, it is obtained from the length of `out`.
    samplerate, channels, format, subtype, endian, closefd
        See :class:`SoundFile`.

    Examples
    --------
    >>> from paderbox.io import load_audio
    >>> from paderbox.testing.testfile_fetcher import get_file_path
    >>> path = get_file_path('speech.wav')
    >>> data = load_audio(path)
    >>> data.shape
    (49600,)

    Say you load audio examples from a very long audio, you can provide a
    start position and a duration in samples or seconds.

    >>> path = get_file_path('speech.wav')
    >>> signal = load_audio(path, start=0, frames=16_000)
    >>> signal.shape
    (16000,)
    >>> signal = load_audio(path, start=0, frames=1, unit='seconds')
    >>> signal.shape
    (16000,)

    If the audio file is to short, only return the defined part:

    >>> signal = load_audio(path, start=0, frames=160_000)
    >>> signal.shape
    (49600,)

    >>> path = get_file_path('123_1pcbe_shn.sph')
    >>> load_audio(path)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    RuntimeError: Wrong suffix .sph in .../123_1pcbe_shn.sph.
    File format:
    .../123_1pcbe_shn.sph: NIST SPHERE file
    <BLANKLINE>
    """

    # soundfile does not support pathlib.Path.
    # ToDo: Is this sill True?
    path = normalize_path(path, as_str=True)

    if unit == 'samples':
        pass
    elif unit == 'seconds':
        if stop is not None:
            if stop < 0:
                raise NotImplementedError(unit, stop)
        with soundfile.SoundFile(path) as f:
            # total_samples = len(f)
            samplerate = f.samplerate
        start = int(np.round(start * samplerate))
        if frames > 0:
            frames = int(np.round(frames * samplerate))
        if stop is not None and stop > 0:
            stop = int(np.round(stop * samplerate))
    else:
        raise ValueError(unit)

    try:
        if isinstance(path, (str, Path)) and (Path(path).suffix == '.m4a'):
            import audioread
            assert (start == 0 and stop is None), \
                'audioread does not support partial loading of audio files'
            with audioread.audio_open(path) as f:
                samplerate = f.samplerate
                duration = f.duration
                data = []
                scale = 1. / float(1 << (15))
                for buf in f:
                    data.append(
                        np.frombuffer(buf, "<i2").astype(np.float64) * scale)
                signal = np.concatenate(data)
        else:
            with soundfile.SoundFile(
                    path,
                    'r',
            ) as f:
                if dtype is None:
                    from paderbox.utils.mapping import Dispatcher
                    mapping = Dispatcher({
                        'PCM_16': np.int16,
                        'FLOAT': np.float32,
                        'DOUBLE': np.float64,
                    })
                    dtype = mapping[f.subtype]

                frames = f._prepare_read(start=start, stop=stop, frames=frames)
                data = f.read(frames=frames,
                              dtype=dtype,
                              fill_value=fill_value)
            signal, sample_rate = data, f.samplerate
    except RuntimeError as e:
        if isinstance(path, (Path, str)):
            from paderbox.utils.process_caller import run_process
            cp = run_process(['file', f'{path}'])
            stdout = cp.stdout
            if Path(path).suffix == '.wav':
                # Improve exception msg for NIST SPHERE files.
                raise RuntimeError(f'Could not read {path}.\n'
                                   f'File format:\n{stdout}') from e
            else:
                path = Path(path)
                raise RuntimeError(f'Wrong suffix {path.suffix} in {path}.\n'
                                   f'File format:\n{stdout}') from e
        raise

    if expected_sample_rate is not None:
        if expected_sample_rate != sample_rate:
            raise ValueError(
                f'Requested sampling rate is {expected_sample_rate} but the '
                f'audiofile has {sample_rate}')

    # When signal is multichannel, than soundfile return (samples, channels)
    # At NT it is more common to have the shape (channels, samples)
    # => transpose
    signal = signal.T

    if return_sample_rate:
        return signal, sample_rate
    else:
        return signal
Ejemplo n.º 4
0
def dump_audio(
    obj,
    path,
    *,
    sample_rate=16000,
    dtype=np.int16,
    start=None,
    normalize=True,
    format=None,
):
    """
    If normalize is False and the dytpe is float, the values of obj should be in
    the range [-1, 1).

    Params:
        obj: Shape (channels, samples) or (samples,)
        path:
        sample_rate:
        dtype:
            The dtype of the written file. Default is integer with 16 bit.
        start:
            Offset to write in an existing file. Can be used for block
            processing algorithms that use overlap save.
        normalize:
            bool, if the audio stream should be normalized to be in the range
            -1 to 1.
        format:
            Special option. See soundfile.SoundFile.__init__ for details.

    >>> from paderbox.utils.process_caller import run_process
    >>> from paderbox.io import load_audio
    >>> from paderbox.io.cache_dir import get_cache_dir
    >>> a = np.array([1, 2, -4, 4], dtype=np.int16)
    >>> import io, os
    >>> # file = io.BytesIO()
    >>> file = get_cache_dir() / 'tmp_audio_data.wav'
    >>> dump_audio(a, file, normalize=False)
    >>> load_audio(file) * 2**15
    array([ 1.,  2., -4.,  4.])
    >>> print('stdout:', run_process(f'file {file}').stdout)  # doctest: +ELLIPSIS
    stdout: .../tmp_audio_data.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 16000 Hz
    <BLANKLINE>
    >>> dump_audio(a, file, normalize=True)
    >>> load_audio(file)
    array([ 0.24996948,  0.49996948, -0.99996948,  0.99996948])
    >>> print('stdout:', run_process(f'file {file}').stdout)  # doctest: +ELLIPSIS
    stdout: .../tmp_audio_data.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 16000 Hz
    <BLANKLINE>

    >>> data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) / 32
    >>> data
    array([0.     , 0.03125, 0.0625 , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.28125])
    >>> dump_audio(data, file, normalize=False)
    >>> load_audio(file)
    array([0.     , 0.03125, 0.0625 , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.28125])
    >>> print('stdout:', run_process(f'file {file}').stdout)  # doctest: +ELLIPSIS
    stdout: .../tmp_audio_data.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 16000 Hz
    <BLANKLINE>
    >>> dump_audio(np.array([16, 24]) / 32, file, normalize=False, start=1)
    >>> load_audio(file)
    array([0.     , 0.5    , 0.75   , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.28125])
    >>> print('stdout:', run_process(f'file {file}').stdout)  # doctest: +ELLIPSIS
    stdout: ...tmp_audio_data.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 16000 Hz
    <BLANKLINE>
    >>> dump_audio(np.array([16, 24, 24, 24]) / 32, file, normalize=False, start=9)
    >>> load_audio(file)
    array([0.     , 0.5    , 0.75   , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.5    , 0.75   , 0.75   , 0.75   ])
    >>> load_audio(file).shape
    (13,)
    >>> dump_audio(np.array([16, 24, 24, 24]) / 32, file, normalize=False, start=20)
    >>> load_audio(file)
    array([0.     , 0.5    , 0.75   , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.5    , 0.75   , 0.75   , 0.75   , 0.     ,
           0.     , 0.     , 0.     , 0.     , 0.     , 0.     , 0.5    ,
           0.75   , 0.75   , 0.75   ])
    >>> load_audio(file).shape
    (24,)
    >>> print('stdout:', run_process(f'file {file}').stdout)  # doctest: +ELLIPSIS
    stdout: .../tmp_audio_data.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 16000 Hz
    <BLANKLINE>
    >>> os.remove(file)
    >>> dump_audio(np.array([16, 24, 24, 24]) / 32, file, normalize=False, start=20)
    >>> load_audio(file)
    array([0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
           0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.5 , 0.75,
           0.75, 0.75])
    >>> load_audio(file).shape
    (24,)
    >>> print('stdout:', run_process(f'file {file}').stdout)  # doctest: +ELLIPSIS
    stdout: .../tmp_audio_data.wav: RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 16000 Hz
    <BLANKLINE>

    >>> data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) / 32
    >>> data
    array([0.     , 0.03125, 0.0625 , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.28125])
    >>> dump_audio(data, file, normalize=False, dtype=None)
    >>> load_audio(file)
    array([0.     , 0.03125, 0.0625 , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.28125])
    >>> print(run_process(f'soxi {file}').stdout)  # doctest: +ELLIPSIS
    <BLANKLINE>
    Input File     : '.../tmp_audio_data.wav'
    Channels       : 1
    Sample Rate    : 16000
    Precision      : 54-bit
    Duration       : 00:00:00.00 = 10 samples ~ 0.046875 CDDA sectors
    File Size      : 160
    Bit Rate       : 2.05M
    Sample Encoding: 64-bit Floating Point PCM
    <BLANKLINE>
    <BLANKLINE>
    >>> dump_audio(data.astype(np.float32), file, normalize=False, dtype=None)
    >>> load_audio(file, dtype=None)
    array([0.     , 0.03125, 0.0625 , 0.09375, 0.125  , 0.15625, 0.1875 ,
           0.21875, 0.25   , 0.28125], dtype=float32)
    >>> print(run_process(f'soxi {file}').stdout)  # doctest: +ELLIPSIS
    <BLANKLINE>
    Input File     : '.../tmp_audio_data.wav'
    Channels       : 1
    Sample Rate    : 16000
    Precision      : 25-bit
    Duration       : 00:00:00.00 = 10 samples ~ 0.046875 CDDA sectors
    File Size      : 120
    Bit Rate       : 1.54M
    Sample Encoding: 32-bit Floating Point PCM
    <BLANKLINE>
    <BLANKLINE>

    """
    path = normalize_path(path, as_str=True)
    obj = np.asarray(obj)

    if normalize:
        if not obj.dtype.kind in ['f', 'i']:
            raise TypeError(
                'Only float and int is currently supported with normalize. '
                f'Got dtype {obj.dtype}')
        # Normalization can change the type (e.g. int to float).
        # When saving as float, normalize is a bad idea.
        # The normalization is adjusted for int16
        assert dtype == np.int16, (
            'Currently is only normalize allowed for dtype == np.int16'
            f'and not for dtype == {dtype}')
        # Correction, because the allowed values are in the range [-1, 1).
        # => "1" is not a vaild value
        correction = (2**15 - 1) / (2**15)
        obj = obj * (correction / np.amax(np.abs(obj)))

    # ToDo: better exception when path is file descriptor
    if start is None or not Path(path).exists():
        if obj.ndim == 1:
            channels = 1
        else:
            channels = obj.shape[0]

        sf_args = dict(
            mode='w',
            channels=channels,
            samplerate=sample_rate,
        )
    else:
        sf_args = dict(mode='r+')
    sf_args['format'] = format

    dtype_map = Dispatcher({
        np.int16: 'PCM_16',
        np.dtype('int16'): 'PCM_16',
        np.int32: 'PCM_32',
        np.dtype('int32'): 'PCM_32',
        np.float32: 'FLOAT',
        np.dtype('float32'): 'FLOAT',
        np.float64: 'DOUBLE',
        np.dtype('float64'): 'DOUBLE',
    })

    if dtype in [np.int16]:
        pass
    elif dtype in [np.float32, np.float64, np.int32]:
        sf_args['subtype'] = dtype_map[dtype]
    elif dtype is None:
        sf_args['subtype'] = dtype_map[obj.dtype]
    else:
        raise TypeError(dtype)

    with soundfile.SoundFile(path, **sf_args) as f:
        if start is not None:
            f.seek(start)
        f.write(obj.T)
    return
Ejemplo n.º 5
0
def dump_hdf5(data, path):
    # ToDo: drop this wrapper
    path = normalize_path(path, as_str=True, allow_fd=True)
    return hdf5.dump_hdf5(data, path)
Ejemplo n.º 6
0
def load_hdf5(path, internal_path="/"):
    # ToDo: drop this wrapper
    path = normalize_path(path, as_str=True, allow_fd=False)
    return hdf5.load_hdf5(path, str(internal_path))
Ejemplo n.º 7
0
def dump_pickle(data, path):
    path = normalize_path(path, allow_fd=False)
    with path.open("wb") as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 8
0
def load_pickle(path):
    path = normalize_path(path, allow_fd=False)
    with path.open("rb") as f:
        return pickle.load(f)
Ejemplo n.º 9
0
def open_atomic(file, mode, *args, force=False, **kwargs):
    """
    Produce a tempfile aside to the desired file (same filesystem).
    Overwrite the file on successful context (except force is True than always).

    This function is inspiered from:
    # https://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python

    For replace alternative see
    # http://code.activestate.com/recipes/579097-safely-and-atomically-write-to-a-file/


    Note:
        This function allows to overwrite a file, while another process still
        read the file. The other process keep a file pointer to the original
        file. The original file is removed from the filesystem when nobody
        reads the file. This is a nice property known from Makefiles.

    Note:
        A file that is written with this function can only be read, when it is
        fully written (i.e. the context is left).

    Examples:

    Procure a file with some content
    >>> from paderbox.io.cache_dir import get_cache_dir
    >>> file = get_cache_dir() / 'tmp.io.txt'
    >>> with open(file, 'w') as f:
    ...     f.write('test\\nbla')
    8
    >>> with open(file, 'r') as f:
    ...     print(f.read())
    test
    bla

    Read the file and write to the file the same content
    (with open_atomic no problem)
    >>> with open(file, 'r') as src:
    ...     with open_atomic(file, 'w') as f:
    ...         for line in src:
    ...             f.write(line)
    ...     src.seek(0)
    ...     with open_atomic(file, 'w') as f:
    ...         for line in src:
    ...             f.write(line + 'second write\\n')
    5
    3
    0
    18
    16
    >>> with open(file, 'r') as f:
    ...     print(f.read())
    test
    second write
    blasecond write
    <BLANKLINE>

    Read the file and write to the file the same content
    (with open this does not work)
    >>> with open(file, 'r') as src:
    ...     with open(file, 'w') as dst:
    ...         for line in src:
    ...             f.write(line)
    >>> with open(file, 'r') as f:
    ...     print(f.read())
    <BLANKLINE>
    >>> with open_atomic(file, 'w') as f:
    ...     f.write('test\\nbla')
    ...     f.write('test\\nbla')
    8
    8
    >>> with open(file, 'r') as f:
    ...     print(f.read())
    test
    blatest
    bla

    When an exception occurs do not write anything (except force is True)
    >>> with open_atomic(file, 'w') as f:
    ...     f.write('sdkfg\\nbla')
    ...     raise Exception
    Traceback (most recent call last):
    ...
    Exception
    >>> with open(file, 'r') as f:
    ...     print(f.read())
    test
    blatest
    bla
    >>> with open_atomic(file, 'w', force=True) as f:
    ...     f.write('sdkfg\\nbla')
    ...     raise Exception
    Traceback (most recent call last):
    ...
    Exception
    >>> with open(file, 'r') as f:
    ...     print(f.read())
    sdkfg
    bla

    >>> with open_atomic(file, 'w') as f:
    ...     print('Name:', f.name)  # doctest: +ELLIPSIS
    Name: .../tmp.io.txt...
    """
    file = normalize_path(file, as_str=True, allow_fd=False)

    assert 'w' in mode, mode

    with tempfile.NamedTemporaryFile(
            mode, *args, **kwargs, prefix=file, dir=os.getcwd()
    ) as tmp_f:
        def cleanup():
            tmp_f.flush()
            os.fsync(tmp_f.fileno())
            # os.rename(tmp_f.name, file)  # fails if dst exists
            os.replace(tmp_f.name, file)

            # Disable NamedTemporaryFile.close(), because the file was renamed.
            tmp_f.delete = False
            tmp_f._closer.delete = False
        try:
            yield tmp_f
            if not force:
                cleanup()
        finally:
            if force:
                cleanup()