def sox_build_flow_effects(self, out: Optional[Tensor] = None ) -> Tuple[Tensor, int]: r"""Build effects chain and flow effects from input file to output tensor Args: out (Tensor, optional): Where the output will be written to. (Default: ``None``) Returns: Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and C is the number of channels. An integer which is the sample rate of the audio (as listed in the metadata of the file) """ # initialize output tensor if out is not None: _misc_ops.check_input(out) else: out = torch.FloatTensor() if not len(self.chain): e = SoxEffect() e.ename = "no_effects" e.eopts = [""] self.chain.append(e) # print("effect options:", [x.eopts for x in self.chain]) sr = _torchaudio.build_flow_effects(self.input_file, out, self.channels_first, self.out_siginfo, self.out_encinfo, self.filetype, self.chain, self.MAX_EFFECT_OPTS) _misc_ops.normalize_audio(out, self.normalization) return out, sr
def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: r"""See torchaudio.save""" ch_idx, len_idx = (0, 1) if channels_first else (1, 0) # check if save directory exists abs_dirpath = os.path.dirname(os.path.abspath(filepath)) if not os.path.isdir(abs_dirpath): raise OSError("Directory does not exist: {}".format(abs_dirpath)) # check that src is a CPU tensor _misc_ops.check_input(src) # Check/Fix shape of source data if src.dim() == 1: # 1d tensors as assumed to be mono signals src.unsqueeze_(ch_idx) elif src.dim() > 2 or src.size(ch_idx) > 16: # assumes num_channels < 16 raise ValueError( "Expected format where C < 16, but found {}".format(src.size())) if channels_first: src = src.t() if src.dtype == torch.int64: # Soundfile doesn't support int64 src = src.type(torch.int32) precision = "PCM_S8" if precision == 8 else "PCM_" + str(precision) return soundfile.write(filepath, src, sample_rate, precision)
def load(filepath: str, out: Optional[Tensor] = None, normalization: bool = True, channels_first: bool = True, num_frames: int = 0, offset: int = 0, signalinfo: SignalInfo = None, encodinginfo: EncodingInfo = None, filetype: Optional[str] = None) -> Tuple[Tensor, int]: r"""See torchaudio.load""" # stringify if `pathlib.Path` (noop if already `str`) filepath = str(filepath) # check if valid file if not os.path.isfile(filepath): raise OSError("{} not found or is a directory".format(filepath)) # initialize output tensor if out is not None: _misc_ops.check_input(out) else: out = torch.FloatTensor() if num_frames < -1: raise ValueError("Expected value for num_samples -1 (entire file) or >=0") if offset < 0: raise ValueError("Expected positive offset value") sample_rate = _torchaudio.read_audio_file( filepath, out, channels_first, num_frames, offset, signalinfo, encodinginfo, filetype ) # normalize if needed _misc_ops.normalize_audio(out, normalization) return out, sample_rate
def save_encinfo(filepath: str, src: Tensor, channels_first: bool = True, signalinfo: Optional[SignalInfo] = None, encodinginfo: Optional[EncodingInfo] = None, filetype: Optional[str] = None) -> None: r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc. Args: filepath (str): Path to audio file src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``) signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the audio type cannot be automatically determined (Default: ``None``). encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the audio type cannot be automatically determined (Default: ``None``). filetype (str, optional): A filetype or extension to be set if sox cannot determine it automatically. (Default: ``None``) Example >>> data, sample_rate = torchaudio.load('foo.mp3') >>> torchaudio.save('foo.wav', data, sample_rate) """ ch_idx, len_idx = (0, 1) if channels_first else (1, 0) # check if save directory exists abs_dirpath = os.path.dirname(os.path.abspath(filepath)) if not os.path.isdir(abs_dirpath): raise OSError("Directory does not exist: {}".format(abs_dirpath)) # check that src is a CPU tensor _misc_ops.check_input(src) # Check/Fix shape of source data if src.dim() == 1: # 1d tensors as assumed to be mono signals src.unsqueeze_(ch_idx) elif src.dim() > 2 or src.size(ch_idx) > 16: # assumes num_channels < 16 raise ValueError("Expected format where C < 16, but found {}".format( src.size())) # sox stores the sample rate as a float, though practically sample rates are almost always integers # convert integers to floats if signalinfo: if signalinfo.rate and not isinstance(signalinfo.rate, float): if float(signalinfo.rate) == signalinfo.rate: signalinfo.rate = float(signalinfo.rate) else: raise TypeError('Sample rate should be a float or int') # check if the bit precision (i.e. bits per sample) is an integer if signalinfo.precision and not isinstance(signalinfo.precision, int): if int(signalinfo.precision) == signalinfo.precision: signalinfo.precision = int(signalinfo.precision) else: raise TypeError('Bit precision should be an integer') # programs such as librosa normalize the signal, unnormalize if detected if src.min() >= -1.0 and src.max() <= 1.0: src = src * (1 << 31) src = src.long() # set filetype and allow for files with no extensions extension = os.path.splitext(filepath)[1] filetype = extension[1:] if len(extension) > 0 else filetype # transpose from C x L -> L x C if channels_first: src = src.transpose(1, 0) # save data to file src = src.contiguous() _torchaudio.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype)