Example #1
0
    def sox_build_flow_effects(self,
                               out: Optional[Tensor] = None
                               ) -> Tuple[Tensor, int]:
        r"""Build effects chain and flow effects from input file to output tensor

        Args:
            out (Tensor, optional): Where the output will be written to. (Default: ``None``)

        Returns:
            Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
            of audio frames and C is the number of channels. An integer which is the sample rate of the
            audio (as listed in the metadata of the file)
        """
        # initialize output tensor
        if out is not None:
            _misc_ops.check_input(out)
        else:
            out = torch.FloatTensor()
        if not len(self.chain):
            e = SoxEffect()
            e.ename = "no_effects"
            e.eopts = [""]
            self.chain.append(e)

        # print("effect options:", [x.eopts for x in self.chain])

        sr = _torchaudio.build_flow_effects(self.input_file, out,
                                            self.channels_first,
                                            self.out_siginfo, self.out_encinfo,
                                            self.filetype, self.chain,
                                            self.MAX_EFFECT_OPTS)

        _misc_ops.normalize_audio(out, self.normalization)

        return out, sr
Example #2
0
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
    r"""See torchaudio.save"""

    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)

    # check if save directory exists
    abs_dirpath = os.path.dirname(os.path.abspath(filepath))
    if not os.path.isdir(abs_dirpath):
        raise OSError("Directory does not exist: {}".format(abs_dirpath))
    # check that src is a CPU tensor
    _misc_ops.check_input(src)
    # Check/Fix shape of source data
    if src.dim() == 1:
        # 1d tensors as assumed to be mono signals
        src.unsqueeze_(ch_idx)
    elif src.dim() > 2 or src.size(ch_idx) > 16:
        # assumes num_channels < 16
        raise ValueError(
            "Expected format where C < 16, but found {}".format(src.size()))

    if channels_first:
        src = src.t()

    if src.dtype == torch.int64:
        # Soundfile doesn't support int64
        src = src.type(torch.int32)

    precision = "PCM_S8" if precision == 8 else "PCM_" + str(precision)

    return soundfile.write(filepath, src, sample_rate, precision)
Example #3
0
def load(filepath: str,
         out: Optional[Tensor] = None,
         normalization: bool = True,
         channels_first: bool = True,
         num_frames: int = 0,
         offset: int = 0,
         signalinfo: SignalInfo = None,
         encodinginfo: EncodingInfo = None,
         filetype: Optional[str] = None) -> Tuple[Tensor, int]:
    r"""See torchaudio.load"""

    # stringify if `pathlib.Path` (noop if already `str`)
    filepath = str(filepath)
    # check if valid file
    if not os.path.isfile(filepath):
        raise OSError("{} not found or is a directory".format(filepath))

    # initialize output tensor
    if out is not None:
        _misc_ops.check_input(out)
    else:
        out = torch.FloatTensor()

    if num_frames < -1:
        raise ValueError("Expected value for num_samples -1 (entire file) or >=0")
    if offset < 0:
        raise ValueError("Expected positive offset value")

    sample_rate = _torchaudio.read_audio_file(
        filepath,
        out,
        channels_first,
        num_frames,
        offset,
        signalinfo,
        encodinginfo,
        filetype
    )

    # normalize if needed
    _misc_ops.normalize_audio(out, normalization)

    return out, sample_rate
Example #4
0
def save_encinfo(filepath: str,
                 src: Tensor,
                 channels_first: bool = True,
                 signalinfo: Optional[SignalInfo] = None,
                 encodinginfo: Optional[EncodingInfo] = None,
                 filetype: Optional[str] = None) -> None:
    r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc.

    Args:
        filepath (str): Path to audio file
        src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
            the number of audio frames, C is the number of channels
        channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``)
        signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
            audio type cannot be automatically determined (Default: ``None``).
        encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
            audio type cannot be automatically determined (Default: ``None``).
        filetype (str, optional): A filetype or extension to be set if sox cannot determine it
            automatically. (Default: ``None``)

    Example
        >>> data, sample_rate = torchaudio.load('foo.mp3')
        >>> torchaudio.save('foo.wav', data, sample_rate)

    """
    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)

    # check if save directory exists
    abs_dirpath = os.path.dirname(os.path.abspath(filepath))
    if not os.path.isdir(abs_dirpath):
        raise OSError("Directory does not exist: {}".format(abs_dirpath))
    # check that src is a CPU tensor
    _misc_ops.check_input(src)
    # Check/Fix shape of source data
    if src.dim() == 1:
        # 1d tensors as assumed to be mono signals
        src.unsqueeze_(ch_idx)
    elif src.dim() > 2 or src.size(ch_idx) > 16:
        # assumes num_channels < 16
        raise ValueError("Expected format where C < 16, but found {}".format(
            src.size()))
    # sox stores the sample rate as a float, though practically sample rates are almost always integers
    # convert integers to floats
    if signalinfo:
        if signalinfo.rate and not isinstance(signalinfo.rate, float):
            if float(signalinfo.rate) == signalinfo.rate:
                signalinfo.rate = float(signalinfo.rate)
            else:
                raise TypeError('Sample rate should be a float or int')
        # check if the bit precision (i.e. bits per sample) is an integer
        if signalinfo.precision and not isinstance(signalinfo.precision, int):
            if int(signalinfo.precision) == signalinfo.precision:
                signalinfo.precision = int(signalinfo.precision)
            else:
                raise TypeError('Bit precision should be an integer')
    # programs such as librosa normalize the signal, unnormalize if detected
    if src.min() >= -1.0 and src.max() <= 1.0:
        src = src * (1 << 31)
        src = src.long()
    # set filetype and allow for files with no extensions
    extension = os.path.splitext(filepath)[1]
    filetype = extension[1:] if len(extension) > 0 else filetype
    # transpose from C x L -> L x C
    if channels_first:
        src = src.transpose(1, 0)
    # save data to file
    src = src.contiguous()
    _torchaudio.write_audio_file(filepath, src, signalinfo, encodinginfo,
                                 filetype)