def sampling_rate(file: str) -> int: """Sampling rate of audio file. Args: file: file name of input audio file Returns: sampling rate of audio file Raises: RuntimeError: if ``file`` is broken or not a supported format """ file = audeer.safe_path(file) if file_extension(file) in SNDFORMATS: return soundfile.info(file).samplerate else: try: return int(sox.file_info.sample_rate(file)) except sox.core.SoxiError: cmd = f'mediainfo --Inform="Audio;%SamplingRate%" "{file}"' sampling_rate = run(cmd) if sampling_rate: return int(sampling_rate) else: raise RuntimeError(broken_file_error(file))
def channels(file: str) -> int: """Number of channels in audio file. Args: file: file name of input audio file Returns: number of channels in audio file Raises: RuntimeError: if ``file`` is broken or not a supported format """ file = audeer.safe_path(file) if file_extension(file) in SNDFORMATS: return soundfile.info(file).channels else: try: return int(sox.file_info.channels(file)) except sox.core.SoxiError: # For MP4 stored and returned number of channels can be different cmd1 = f'mediainfo --Inform="Audio;%Channel(s)_Original%" "{file}"' cmd2 = f'mediainfo --Inform="Audio;%Channel(s)%" "{file}"' try: return int(run(cmd1)) except ValueError: try: return int(run(cmd2)) except ValueError: raise RuntimeError(broken_file_error(file))
def samples(file: str) -> int: """Number of samples in audio file. Args: file: file name of input audio file Returns: number of samples in audio file Raises: RuntimeError: if ``file`` is broken or not a supported format """ def samples_as_int(file): return int( soundfile.info(file).duration * soundfile.info(file).samplerate) file = audeer.safe_path(file) if file_extension(file) in SNDFORMATS: return samples_as_int(file) else: # Always convert to WAV for non SNDFORMATS with tempfile.TemporaryDirectory(prefix='audiofile') as tmpdir: tmpfile = os.path.join(tmpdir, 'tmp.wav') convert_to_wav(file, tmpfile) return samples_as_int(tmpfile)
def bit_depth(file: str) -> typing.Optional[int]: r"""Bit depth of audio file. For lossy audio files, ``None`` is returned as they have a varying bit depth. Args: file: file name of input audio file Returns: bit depth of audio file Raises: RuntimeError: if ``file`` is broken or not a supported format """ file = audeer.safe_path(file) file_type = file_extension(file) if file_type == 'wav': precision_mapping = { 'PCM_16': 16, 'PCM_24': 24, 'PCM_32': 32, 'PCM_U8': 8, 'FLOAT': 32, 'DOUBLE': 64, 'ULAW': 8, 'ALAW': 8, 'IMA_ADPCM': 4, 'MS_ADPCM': 4, 'GSM610': 16, # not sure if this could be variable? 'G721_32': 4, # not sure if correct } elif file_type == 'flac': precision_mapping = { 'PCM_16': 16, 'PCM_24': 24, 'PCM_32': 32, 'PCM_S8': 8, } if file_extension(file) in ['wav', 'flac']: depth = precision_mapping[soundfile.info(file).subtype] else: depth = None return depth
def duration(file: str, sloppy=False) -> float: """Duration in seconds of audio file. The default behavior (``sloppy=False``) ensures the duration in seconds matches the one in samples. To achieve this it first decodes files to WAV if needed, e.g. MP3 files. If you have different decoders on different machines, results might differ. The case ``sloppy=True`` returns the duration as reported in the header of the audio file. This is faster, but might still return different results on different machines as it depends on the installed software. If no duration information is provided in the header it will fall back to ``sloppy=False``. Args: file: file name of input audio file sloppy: if ``True`` report duration as stored in the header Returns: duration in seconds of audio file Raises: RuntimeError: if ``file`` is broken or not a supported format """ file = audeer.safe_path(file) if file_extension(file) in SNDFORMATS: return soundfile.info(file).duration if sloppy: try: duration = sox.file_info.duration(file) if duration is None: duration = 0.0 return duration except sox.core.SoxiError: cmd = f'mediainfo --Inform="Audio;%Duration%" "{file}"' duration = run(cmd) if duration: # Convert to seconds, as mediainfo returns milliseconds return float(duration) / 1000 return samples(file) / sampling_rate(file)
def read( file: str, duration: float = None, offset: float = 0, always_2d: bool = False, dtype: str = 'float32', **kwargs, ) -> typing.Tuple[np.array, int]: """Read audio file. It uses :func:`soundfile.read` for WAV, FLAC, and OGG files. All other audio files are first converted to WAV by sox or ffmpeg. Args: file: file name of input audio file duration: return only a specified duration in seconds offset: start reading at offset in seconds always_2d: if ``True`` it always returns a two-dimensional signal even for mono sound files dtype: data type of returned signal, select from ``'float64'``, ``'float32'``, ``'int32'``, ``'int16'`` kwargs: pass on further arguments to :func:`soundfile.read` Returns: * a two-dimensional array in the form ``[channels, samples]``. If the sound file has only one channel and ``always_2d=False``, a one-dimensional array is returned * sample rate of the audio file Raises: RuntimeError: if ``file`` is broken or not a supported format """ file = audeer.safe_path(file) tmpdir = None if file_extension(file) not in SNDFORMATS: # Convert file formats not recognized by soundfile to WAV first. # # NOTE: this is faster than loading them with librosa directly. # In addition, librosa seems to have an issue with the precission of # the returned magnitude # (https://github.com/librosa/librosa/issues/811). # # It might be the case that MP3 files will be supported by soundfile in # the future as well. For a discussion on MP3 support in the underlying # libsndfile see https://github.com/erikd/libsndfile/issues/258. with tempfile.TemporaryDirectory(prefix='audiofile') as tmpdir: tmpfile = os.path.join(tmpdir, 'tmp.wav') convert_to_wav(file, tmpfile, offset, duration) signal, sample_rate = soundfile.read( tmpfile, dtype=dtype, always_2d=always_2d, **kwargs, ) else: if duration is not None or offset > 0: sample_rate = sampling_rate(file) if offset > 0: offset = np.ceil(offset * sample_rate) # samples if duration is not None: duration = int(np.ceil(duration * sample_rate) + offset) # samples signal, sample_rate = soundfile.read( file, start=int(offset), stop=duration, dtype=dtype, always_2d=always_2d, **kwargs, ) # [samples, channels] => [channels, samples] signal = signal.T return signal, sample_rate
def write( file: str, signal: np.array, sampling_rate: int, bit_depth: int = 16, normalize: bool = False, **kwargs, ): """Write (normalized) audio files. Save audio data provided as an array of shape ``[channels, samples]`` to a WAV, FLAC, or OGG file. ``channels`` can be up to 65535 for WAV, 255 for OGG, and 8 for FLAC. For monaural audio the array can be one-dimensional. It uses :func:`soundfile.write` to write the audio files. Args: file: file name of output audio file. The format (WAV, FLAC, OGG) will be inferred from the file name signal: audio data to write sampling_rate: sample rate of the audio data bit_depth: bit depth of written file in bit, can be 8, 16, 24 for WAV and FLAC files, and in addition 32 for WAV files normalize: normalize audio data before writing kwargs: pass on further arguments to :func:`soundfile.write` Raises: RuntimeError: for non-supported bit depth or number of channels """ file = audeer.safe_path(file) file_type = file_extension(file) # Check for allowed precisions if file_type == 'wav': depth_mapping = { 8: 'PCM_U8', 16: 'PCM_16', 24: 'PCM_24', 32: 'PCM_32', } elif file_type == 'flac': depth_mapping = { 8: 'PCM_S8', 16: 'PCM_16', 24: 'PCM_24', } if file_type in ['wav', 'flac']: bit_depths = sorted(list(depth_mapping.keys())) if bit_depth not in bit_depths: raise RuntimeError( f'"bit_depth" has to be one of ' f'{", ".join([str(b) for b in bit_depths])}.' ) subtype = depth_mapping[bit_depth] else: subtype = None # Check if number of channels is allowed for chosen file type if signal.ndim > 1: channels = np.shape(signal)[0] else: channels = 1 if channels > MAX_CHANNELS[file_type]: if file_type != 'wav': hint = " Consider using 'wav' instead." else: hint = '' raise RuntimeError( "The maximum number of allowed channels " f"for '{file_type}' is {MAX_CHANNELS[file_type]}.{hint}" ) if normalize: signal = signal / np.max(np.abs(signal)) soundfile.write(file, signal.T, sampling_rate, subtype=subtype, **kwargs)