def load(filepath: Union[str, Path], out: Optional[Tensor] = None, normalization: Union[bool, float, Callable] = True, channels_first: bool = True, num_frames: int = 0, offset: int = 0, signalinfo: Optional[SignalInfo] = None, encodinginfo: Optional[EncodingInfo] = None, filetype: Optional[str] = None) -> Tuple[Tensor, int]: r"""Loads an audio file from disk into a tensor Args: filepath (str or Path): Path to audio file out (Tensor, optional): An output tensor to use instead of creating one. (Default: ``None``) normalization (bool, float, or callable, optional): If boolean `True`, then output is divided by `1 << 31` (assumes signed 32-bit audio), and normalizes to `[-1, 1]`. If `float`, then output is divided by that number If `Callable`, then the output is passed as a parameter to the given function, then the output is divided by the result. (Default: ``True``) channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``) num_frames (int, optional): Number of frames to load. 0 to load everything after the offset. (Default: ``0``) offset (int, optional): Number of frames from the start of the file to begin data loading. (Default: ``0``) signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the audio type cannot be automatically determined. (Default: ``None``) encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the audio type cannot be automatically determined. (Default: ``None``) filetype (str, optional): A filetype or extension to be set if sox cannot determine it automatically. (Default: ``None``) Returns: (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and C is the number of channels. An integer which is the sample rate of the audio (as listed in the metadata of the file) Example >>> data, sample_rate = torchaudio.load('foo.mp3') >>> print(data.size()) torch.Size([2, 278756]) >>> print(sample_rate) 44100 >>> data_vol_normalized, _ = torchaudio.load('foo.mp3', normalization=lambda x: torch.abs(x).max()) >>> print(data_vol_normalized.abs().max()) 1. """ return _get_audio_backend_module().load( filepath, out=out, normalization=normalization, channels_first=channels_first, num_frames=num_frames, offset=offset, signalinfo=signalinfo, encodinginfo=encodinginfo, filetype=filetype, )
def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]: r"""Gets metadata from an audio file without loading the signal. Args: filepath (str): Path to audio file Returns: (sox_signalinfo_t, sox_encodinginfo_t): A si (sox_signalinfo_t) signal info as a python object. An ei (sox_encodinginfo_t) encoding info Example >>> si, ei = torchaudio.info('foo.wav') >>> rate, channels, encoding = si.rate, si.channels, ei.encoding """ return _get_audio_backend_module().info(filepath)
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: r"""Convenience function for `save_encinfo`. Args: filepath (str): Path to audio file src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels sample_rate (int): An integer which is the sample rate of the audio (as listed in the metadata of the file) precision (int, optional): Bit precision (Default: ``16``) channels_first (bool, optional): Set channels first or length first in result. ( Default: ``True``) """ return _get_audio_backend_module().save( filepath, src, sample_rate, precision=precision, channels_first=channels_first )