def forward(self, specgram: Tensor) -> Tensor: r""" Args: specgram (Tensor): A spectrogram STFT of dimension (..., freq, time). Returns: Tensor: Mel frequency spectrogram of size (..., ``n_mels``, time). """ # pack batch shape = specgram.size() specgram = specgram.reshape(-1, shape[-2], shape[-1]) if self.fb.numel() == 0: tmp_fb = F.create_fb_matrix(specgram.size(1), self.f_min, self.f_max, self.n_mels, self.sample_rate, self.norm, self.mel_scale) # Attributes cannot be reassigned outside __init__ so workaround self.fb.resize_(tmp_fb.size()) self.fb.copy_(tmp_fb) # (channel, frequency, time).transpose(...) dot (frequency, n_mels) # -> (channel, time, n_mels).transpose(...) mel_specgram = torch.matmul(specgram.transpose(1, 2), self.fb).transpose(1, 2) # unpack batch mel_specgram = mel_specgram.reshape(shape[:-2] + mel_specgram.shape[-2:]) return mel_specgram
def __init__(self, n_stft: int, n_mels: int = 128, sample_rate: int = 16000, f_min: float = 0., f_max: Optional[float] = None, max_iter: int = 100000, tolerance_loss: float = 1e-5, tolerance_change: float = 1e-8, sgdargs: Optional[dict] = None) -> None: super(InverseMelScale, self).__init__() self.n_mels = n_mels self.sample_rate = sample_rate self.f_max = f_max or float(sample_rate // 2) self.f_min = f_min self.max_iter = max_iter self.tolerance_loss = tolerance_loss self.tolerance_change = tolerance_change self.sgdargs = sgdargs or {'lr': 0.1, 'momentum': 0.9} assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % ( f_min, self.f_max) fb = F.create_fb_matrix(n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate) self.register_buffer('fb', fb)
def func(_): n_stft = 100 f_min = 0.0 f_max = 20.0 n_mels = 10 sample_rate = 16000 norm = "slaney" return F.create_fb_matrix(n_stft, f_min, f_max, n_mels, sample_rate, norm)
def __init__(self, sample_rate=22050, n_fft=2048, n_mels=256, f_min=0., f_max=None): f_max = float(sample_rate // 2) if f_max is None else f_max assert f_min <= f_max self.fb = create_fb_matrix(n_fft // 2 + 1, f_min, f_max, n_mels)
def test_torchscript_create_fb_matrix(self): @torch.jit.script def jit_method(n_stft, f_min, f_max, n_mels): # type: (int, float, float, int) -> Tensor return F.create_fb_matrix(n_stft, f_min, f_max, n_mels) n_stft = 100 f_min = 0. f_max = 20. n_mels = 10 jit_out = jit_method(n_stft, f_min, f_max, n_mels) py_out = F.create_fb_matrix(n_stft, f_min, f_max, n_mels) self.assertTrue(torch.allclose(jit_out, py_out))
def __init__(self, n_mels=128, sample_rate=16000, f_min=0., f_max=None, n_stft=None): super(MelScale, self).__init__() self.n_mels = n_mels self.sample_rate = sample_rate self.f_max = f_max if f_max is not None else float(sample_rate // 2) assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % ( f_min, self.f_max) self.f_min = f_min fb = torch.empty(0) if n_stft is None else F.create_fb_matrix( n_stft, self.f_min, self.f_max, self.n_mels) self.fb = fb
def _test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0): librosa_fb = librosa.filters.mel(sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmax=fmax, fmin=fmin, htk=True, norm=None) fb = F.create_fb_matrix(sample_rate=sample_rate, n_mels=n_mels, f_max=fmax, f_min=fmin, n_freqs=(n_fft // 2 + 1)) for i_mel_bank in range(n_mels): assert torch.allclose(fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]), atol=1e-4)
def __init__(self, n_mels: int = 128, sample_rate: int = 16000, f_min: float = 0., f_max: Optional[float] = None, n_stft: Optional[int] = None) -> None: super(MelScale, self).__init__() self.n_mels = n_mels self.sample_rate = sample_rate self.f_max = f_max if f_max is not None else float(sample_rate // 2) self.f_min = f_min assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % (f_min, self.f_max) fb = torch.empty(0) if n_stft is None else F.create_fb_matrix( n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate) self.register_buffer('fb', fb)
def forward(self, specgram): r""" Args: specgram (torch.Tensor): A spectrogram STFT of dimension (channel, freq, time) Returns: torch.Tensor: Mel frequency spectrogram of size (channel, ``n_mels``, time) """ if self.fb.numel() == 0: tmp_fb = F.create_fb_matrix(specgram.size(1), self.f_min, self.f_max, self.n_mels) # Attributes cannot be reassigned outside __init__ so workaround self.fb.resize_(tmp_fb.size()) self.fb.copy_(tmp_fb) # (channel, frequency, time).transpose(...) dot (frequency, n_mels) # -> (channel, time, n_mels).transpose(...) mel_specgram = torch.matmul(specgram.transpose(1, 2), self.fb).transpose(1, 2) return mel_specgram
def _test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0): # Using a decorator here causes parametrize to fail on Python 2 if not IMPORT_LIBROSA: raise unittest.SkipTest('Librosa is not available') librosa_fb = librosa.filters.mel(sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmax=fmax, fmin=fmin, htk=True, norm=None) fb = F.create_fb_matrix(sample_rate=sample_rate, n_mels=n_mels, f_max=fmax, f_min=fmin, n_freqs=(n_fft // 2 + 1)) for i_mel_bank in range(n_mels): assert torch.allclose(fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]), atol=1e-4)
def __init__(self, n_mels: int = 128, sample_rate: int = 16000, f_min: float = 0., f_max: Optional[float] = None, n_stft: Optional[int] = None, norm: Optional[str] = None, mel_scale: str = "htk") -> None: super(MelScale, self).__init__() self.n_mels = n_mels self.sample_rate = sample_rate self.f_max = f_max if f_max is not None else float(sample_rate // 2) self.f_min = f_min self.norm = norm self.mel_scale = mel_scale assert f_min <= self.f_max, 'Require f_min: {} < f_max: {}'.format(f_min, self.f_max) fb = torch.empty(0) if n_stft is None else F.create_fb_matrix( n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate, self.norm, self.mel_scale) self.register_buffer('fb', fb)
def jit_method(n_stft, f_min, f_max, n_mels): # type: (int, float, float, int) -> Tensor return F.create_fb_matrix(n_stft, f_min, f_max, n_mels)
def test_warning(self): with pytest.warns(None) as w: F.create_fb_matrix(201, 0, 8000, 128, 16000) assert len(w) == 1
def test_no_warning_low_n_mels(self): with pytest.warns(None) as w: F.create_fb_matrix(201, 0, 8000, 89, 16000) assert len(w) == 0
def test_warning(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") F.create_fb_matrix(201, 0, 8000, 128, 16000) assert len(w) == 1
def test_no_warning_high_n_freq(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") F.create_fb_matrix(288, 0, 8000, 128, 16000) assert len(w) == 0
def test_create_fb_matrix_no_warning_low_n_mels(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") F.create_fb_matrix(201, 0, 8000, 89, 16000) assert len(w) == 0
# --------------- # # ``torchaudio.functional.create_fb_matrix`` generates the filter bank # for converting frequency bins to mel-scale bins. # # Since this function does not require input audio/features, there is no # equivalent transform in ``torchaudio.transforms``. # n_fft = 256 n_mels = 64 sample_rate = 6000 mel_filters = F.create_fb_matrix(int(n_fft // 2 + 1), n_mels=n_mels, f_min=0., f_max=sample_rate / 2., sample_rate=sample_rate, norm='slaney') plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio") ###################################################################### # Comparison against librosa # ~~~~~~~~~~~~~~~~~~~~~~~~~~ # # For reference, here is the equivalent way to get the mel filter bank # with ``librosa``. # mel_filters_librosa = librosa.filters.mel( sample_rate, n_fft,
def test_no_warning_high_n_freq(self): with pytest.warns(None) as w: F.create_fb_matrix(288, 0, 8000, 128, 16000) assert len(w) == 0